aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2003-02-24 16:59:11 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2003-02-24 16:59:11 +0000
commit0b109673722dd825609e7cc51ca124693f0b8240 (patch)
treeba95c49e5ebe85976c365b232c8f48634784cca1 /xen
parenta48212cb65e09669ed243581556529681cebba0a (diff)
downloadxen-0b109673722dd825609e7cc51ca124693f0b8240.tar.gz
xen-0b109673722dd825609e7cc51ca124693f0b8240.tar.bz2
xen-0b109673722dd825609e7cc51ca124693f0b8240.zip
bitkeeper revision 1.94 (3e5a4f5fzVaxemjfCt0N0OH8PYPiuw)
Rename xen-2.4.16 to just "xen" to reflect that it hasn't got any relation to the Linux kernel version.
Diffstat (limited to 'xen')
-rw-r--r--xen/Makefile41
-rw-r--r--xen/README145
-rw-r--r--xen/Rules.mk36
-rw-r--r--xen/arch/i386/Makefile17
-rw-r--r--xen/arch/i386/Rules.mk14
-rw-r--r--xen/arch/i386/acpitable.c549
-rw-r--r--xen/arch/i386/acpitable.h260
-rw-r--r--xen/arch/i386/apic.c845
-rw-r--r--xen/arch/i386/boot/boot.S239
-rw-r--r--xen/arch/i386/delay.c29
-rw-r--r--xen/arch/i386/entry.S534
-rw-r--r--xen/arch/i386/extable.c62
-rw-r--r--xen/arch/i386/i387.c56
-rw-r--r--xen/arch/i386/i8259.c481
-rw-r--r--xen/arch/i386/idle0_task.c20
-rw-r--r--xen/arch/i386/io_apic.c1683
-rw-r--r--xen/arch/i386/ioremap.c106
-rw-r--r--xen/arch/i386/irq.c895
-rw-r--r--xen/arch/i386/mm.c141
-rw-r--r--xen/arch/i386/mpparse.c944
-rw-r--r--xen/arch/i386/pci-dma.c37
-rw-r--r--xen/arch/i386/pci-i386.c391
-rw-r--r--xen/arch/i386/pci-i386.h69
-rw-r--r--xen/arch/i386/pci-irq.c795
-rw-r--r--xen/arch/i386/pci-pc.c1494
-rw-r--r--xen/arch/i386/process.c418
-rw-r--r--xen/arch/i386/rwlock.c33
-rw-r--r--xen/arch/i386/setup.c375
-rw-r--r--xen/arch/i386/smp.c578
-rw-r--r--xen/arch/i386/smpboot.c960
-rw-r--r--xen/arch/i386/time.c434
-rw-r--r--xen/arch/i386/trampoline.S54
-rw-r--r--xen/arch/i386/traps.c696
-rw-r--r--xen/arch/i386/usercopy.c190
-rw-r--r--xen/arch/i386/xeno.lds87
-rw-r--r--xen/common/Makefile8
-rw-r--r--xen/common/ac_timer.c335
-rw-r--r--xen/common/block.c22
-rw-r--r--xen/common/brlock.c69
-rw-r--r--xen/common/dom0_ops.c150
-rw-r--r--xen/common/domain.c590
-rw-r--r--xen/common/domain_page.c67
-rw-r--r--xen/common/event.c33
-rw-r--r--xen/common/kernel.c519
-rw-r--r--xen/common/keyhandler.c130
-rw-r--r--xen/common/lib.c526
-rw-r--r--xen/common/memory.c820
-rw-r--r--xen/common/network.c475
-rw-r--r--xen/common/page_alloc.c288
-rw-r--r--xen/common/perfc.c81
-rw-r--r--xen/common/resource.c332
-rw-r--r--xen/common/schedule.c371
-rw-r--r--xen/common/slab.c1945
-rw-r--r--xen/common/softirq.c332
-rw-r--r--xen/common/timer.c603
-rw-r--r--xen/common/vsprintf.c713
-rw-r--r--xen/drivers/Makefile16
-rw-r--r--xen/drivers/block/Makefile8
-rw-r--r--xen/drivers/block/blkpg.c315
-rw-r--r--xen/drivers/block/elevator.c224
-rw-r--r--xen/drivers/block/genhd.c219
-rw-r--r--xen/drivers/block/ll_rw_blk.c1461
-rw-r--r--xen/drivers/block/xen_block.c402
-rw-r--r--xen/drivers/char/Makefile8
-rw-r--r--xen/drivers/char/xen_kbd.c191
-rw-r--r--xen/drivers/char/xen_serial.c103
-rw-r--r--xen/drivers/ide/Makefile8
-rw-r--r--xen/drivers/ide/ide-disk.c1555
-rw-r--r--xen/drivers/ide/ide-dma.c913
-rw-r--r--xen/drivers/ide/ide-features.c384
-rw-r--r--xen/drivers/ide/ide-geometry.c222
-rw-r--r--xen/drivers/ide/ide-pci.c1016
-rw-r--r--xen/drivers/ide/ide-probe.c1066
-rw-r--r--xen/drivers/ide/ide-taskfile.c1733
-rw-r--r--xen/drivers/ide/ide-xeno.c41
-rw-r--r--xen/drivers/ide/ide.c4197
-rw-r--r--xen/drivers/ide/ide_modes.h236
-rw-r--r--xen/drivers/ide/piix.c542
-rw-r--r--xen/drivers/net/3c59x.c2989
-rw-r--r--xen/drivers/net/Makefile13
-rw-r--r--xen/drivers/net/Space.c44
-rw-r--r--xen/drivers/net/e1000/LICENSE339
-rw-r--r--xen/drivers/net/e1000/Makefile39
-rw-r--r--xen/drivers/net/e1000/e1000.h208
-rw-r--r--xen/drivers/net/e1000/e1000_ethtool.c611
-rw-r--r--xen/drivers/net/e1000/e1000_hw.c3610
-rw-r--r--xen/drivers/net/e1000/e1000_hw.h1789
-rw-r--r--xen/drivers/net/e1000/e1000_main.c2279
-rw-r--r--xen/drivers/net/e1000/e1000_osdep.h112
-rw-r--r--xen/drivers/net/e1000/e1000_param.c655
-rw-r--r--xen/drivers/net/ne/8390.c1158
-rw-r--r--xen/drivers/net/ne/8390.h197
-rw-r--r--xen/drivers/net/ne/Makefile8
-rw-r--r--xen/drivers/net/ne/ne.c685
-rw-r--r--xen/drivers/net/net_init.c732
-rw-r--r--xen/drivers/net/setup.c173
-rw-r--r--xen/drivers/net/tg3.c6884
-rw-r--r--xen/drivers/net/tg3.h1893
-rw-r--r--xen/drivers/pci/Makefile44
-rw-r--r--xen/drivers/pci/compat.c65
-rw-r--r--xen/drivers/pci/gen-devlist.c130
-rw-r--r--xen/drivers/pci/names.c135
-rw-r--r--xen/drivers/pci/pci.c2217
-rw-r--r--xen/drivers/pci/pci.ids6778
-rw-r--r--xen/drivers/pci/proc.c572
-rw-r--r--xen/drivers/pci/quirks.c666
-rw-r--r--xen/drivers/pci/setup-bus.c400
-rw-r--r--xen/drivers/pci/setup-irq.c71
-rw-r--r--xen/drivers/pci/setup-res.c241
-rw-r--r--xen/drivers/pci/syscall.c144
-rw-r--r--xen/drivers/scsi/Makefile11
-rw-r--r--xen/drivers/scsi/aacraid/Makefile17
-rw-r--r--xen/drivers/scsi/aacraid/README42
-rw-r--r--xen/drivers/scsi/aacraid/TODO4
-rw-r--r--xen/drivers/scsi/aacraid/aachba.c1685
-rw-r--r--xen/drivers/scsi/aacraid/aacraid.h1420
-rw-r--r--xen/drivers/scsi/aacraid/commctrl.c438
-rw-r--r--xen/drivers/scsi/aacraid/comminit.c350
-rw-r--r--xen/drivers/scsi/aacraid/commsup.c1028
-rw-r--r--xen/drivers/scsi/aacraid/dpcsup.c207
-rw-r--r--xen/drivers/scsi/aacraid/linit.c794
-rw-r--r--xen/drivers/scsi/aacraid/rx.c457
-rw-r--r--xen/drivers/scsi/aacraid/sa.c406
-rw-r--r--xen/drivers/scsi/constants.c1005
-rw-r--r--xen/drivers/scsi/constants.h6
-rw-r--r--xen/drivers/scsi/hosts.c316
-rw-r--r--xen/drivers/scsi/hosts.h591
-rw-r--r--xen/drivers/scsi/scsi.c2999
-rw-r--r--xen/drivers/scsi/scsi.h896
-rw-r--r--xen/drivers/scsi/scsi_dma.c455
-rw-r--r--xen/drivers/scsi/scsi_error.c2063
-rw-r--r--xen/drivers/scsi/scsi_ioctl.c538
-rw-r--r--xen/drivers/scsi/scsi_lib.c1201
-rw-r--r--xen/drivers/scsi/scsi_merge.c1181
-rw-r--r--xen/drivers/scsi/scsi_module.c.inc71
-rw-r--r--xen/drivers/scsi/scsi_obsolete.h106
-rw-r--r--xen/drivers/scsi/scsi_proc.c329
-rw-r--r--xen/drivers/scsi/scsi_queue.c151
-rw-r--r--xen/drivers/scsi/scsi_scan.c906
-rw-r--r--xen/drivers/scsi/scsi_syms.c105
-rw-r--r--xen/drivers/scsi/scsicam.c236
-rw-r--r--xen/drivers/scsi/sd.c1512
-rw-r--r--xen/drivers/scsi/sd.h66
-rw-r--r--xen/include/asm-i386/apic.h96
-rw-r--r--xen/include/asm-i386/apicdef.h378
-rw-r--r--xen/include/asm-i386/atomic.h204
-rw-r--r--xen/include/asm-i386/bitops.h368
-rw-r--r--xen/include/asm-i386/byteorder.h47
-rw-r--r--xen/include/asm-i386/cache.h13
-rw-r--r--xen/include/asm-i386/cpufeature.h76
-rw-r--r--xen/include/asm-i386/current.h15
-rw-r--r--xen/include/asm-i386/debugreg.h64
-rw-r--r--xen/include/asm-i386/delay.h14
-rw-r--r--xen/include/asm-i386/desc.h32
-rw-r--r--xen/include/asm-i386/dma.h301
-rw-r--r--xen/include/asm-i386/domain_page.h43
-rw-r--r--xen/include/asm-i386/elf.h233
-rw-r--r--xen/include/asm-i386/fixmap.h107
-rw-r--r--xen/include/asm-i386/flushtlb.h48
-rw-r--r--xen/include/asm-i386/hardirq.h88
-rw-r--r--xen/include/asm-i386/hdreg.h12
-rw-r--r--xen/include/asm-i386/i387.h39
-rw-r--r--xen/include/asm-i386/ide.h128
-rw-r--r--xen/include/asm-i386/io.h253
-rw-r--r--xen/include/asm-i386/io_apic.h148
-rw-r--r--xen/include/asm-i386/ioctl.h75
-rw-r--r--xen/include/asm-i386/irq.h203
-rw-r--r--xen/include/asm-i386/mc146818rtc.h113
-rw-r--r--xen/include/asm-i386/mpspec.h224
-rw-r--r--xen/include/asm-i386/msr.h104
-rw-r--r--xen/include/asm-i386/page.h175
-rw-r--r--xen/include/asm-i386/param.h24
-rw-r--r--xen/include/asm-i386/pci.h286
-rw-r--r--xen/include/asm-i386/pgalloc.h117
-rw-r--r--xen/include/asm-i386/processor.h501
-rw-r--r--xen/include/asm-i386/ptrace.h86
-rw-r--r--xen/include/asm-i386/rwlock.h83
-rw-r--r--xen/include/asm-i386/scatterlist.h16
-rw-r--r--xen/include/asm-i386/smp.h92
-rw-r--r--xen/include/asm-i386/smpboot.h121
-rw-r--r--xen/include/asm-i386/softirq.h48
-rw-r--r--xen/include/asm-i386/spinlock.h206
-rw-r--r--xen/include/asm-i386/system.h354
-rw-r--r--xen/include/asm-i386/time.h49
-rw-r--r--xen/include/asm-i386/timex.h58
-rw-r--r--xen/include/asm-i386/types.h50
-rw-r--r--xen/include/asm-i386/uaccess.h600
-rw-r--r--xen/include/asm-i386/unaligned.h37
-rw-r--r--xen/include/hypervisor-ifs/block.h78
-rw-r--r--xen/include/hypervisor-ifs/hypervisor-if.h209
-rw-r--r--xen/include/hypervisor-ifs/network.h131
-rw-r--r--xen/include/scsi/scsi.h237
-rw-r--r--xen/include/scsi/scsi_ioctl.h51
-rw-r--r--xen/include/scsi/scsicam.h19
-rw-r--r--xen/include/scsi/sg.h330
-rw-r--r--xen/include/stdarg.h138
-rw-r--r--xen/include/xeno/ac_timer.h65
-rw-r--r--xen/include/xeno/blk.h409
-rw-r--r--xen/include/xeno/blkdev.h371
-rw-r--r--xen/include/xeno/blkpg.h64
-rw-r--r--xen/include/xeno/block.h11
-rw-r--r--xen/include/xeno/brlock.h220
-rw-r--r--xen/include/xeno/byteorder/big_endian.h68
-rw-r--r--xen/include/xeno/byteorder/generic.h180
-rw-r--r--xen/include/xeno/byteorder/little_endian.h68
-rw-r--r--xen/include/xeno/byteorder/pdp_endian.h88
-rw-r--r--xen/include/xeno/byteorder/swab.h190
-rw-r--r--xen/include/xeno/byteorder/swabb.h137
-rw-r--r--xen/include/xeno/cache.h37
-rw-r--r--xen/include/xeno/config.h136
-rw-r--r--xen/include/xeno/ctype.h54
-rw-r--r--xen/include/xeno/delay.h10
-rw-r--r--xen/include/xeno/dom0_ops.h63
-rw-r--r--xen/include/xeno/elevator.h104
-rw-r--r--xen/include/xeno/errno.h132
-rw-r--r--xen/include/xeno/etherdevice.h68
-rw-r--r--xen/include/xeno/ethtool.h361
-rw-r--r--xen/include/xeno/event.h101
-rw-r--r--xen/include/xeno/genhd.h313
-rw-r--r--xen/include/xeno/hdreg.h662
-rw-r--r--xen/include/xeno/hdsmart.h124
-rw-r--r--xen/include/xeno/ide.h1105
-rw-r--r--xen/include/xeno/if.h141
-rw-r--r--xen/include/xeno/if_ether.h100
-rw-r--r--xen/include/xeno/if_packet.h102
-rw-r--r--xen/include/xeno/if_vlan.h256
-rw-r--r--xen/include/xeno/in.h191
-rw-r--r--xen/include/xeno/init.h170
-rw-r--r--xen/include/xeno/interrupt.h258
-rw-r--r--xen/include/xeno/ioctl.h7
-rw-r--r--xen/include/xeno/ioport.h121
-rw-r--r--xen/include/xeno/irq.h63
-rw-r--r--xen/include/xeno/irq_cpustat.h34
-rw-r--r--xen/include/xeno/kdev_t.h123
-rw-r--r--xen/include/xeno/kernel.h37
-rw-r--r--xen/include/xeno/keyhandler.h16
-rw-r--r--xen/include/xeno/lib.h51
-rw-r--r--xen/include/xeno/list.h160
-rw-r--r--xen/include/xeno/major.h199
-rw-r--r--xen/include/xeno/mii.h165
-rw-r--r--xen/include/xeno/mm.h142
-rw-r--r--xen/include/xeno/module.h417
-rw-r--r--xen/include/xeno/multiboot.h81
-rw-r--r--xen/include/xeno/netdevice.h604
-rw-r--r--xen/include/xeno/notifier.h64
-rw-r--r--xen/include/xeno/pci.h807
-rw-r--r--xen/include/xeno/pci_ids.h1856
-rw-r--r--xen/include/xeno/perfc.h43
-rw-r--r--xen/include/xeno/perfc_defn.h4
-rw-r--r--xen/include/xeno/prefetch.h60
-rw-r--r--xen/include/xeno/reboot.h51
-rw-r--r--xen/include/xeno/sched.h224
-rw-r--r--xen/include/xeno/skbuff.h434
-rw-r--r--xen/include/xeno/slab.h84
-rw-r--r--xen/include/xeno/smp.h88
-rw-r--r--xen/include/xeno/socket.h136
-rw-r--r--xen/include/xeno/sockios.h132
-rw-r--r--xen/include/xeno/spinlock.h142
-rw-r--r--xen/include/xeno/time.h98
-rw-r--r--xen/include/xeno/timer.h81
-rw-r--r--xen/include/xeno/timex.h291
-rw-r--r--xen/include/xeno/tqueue.h125
-rw-r--r--xen/include/xeno/types.h50
-rw-r--r--xen/include/xeno/vif.h94
-rw-r--r--xen/net/Makefile8
-rw-r--r--xen/net/dev.c2019
-rw-r--r--xen/net/dev_mcast.c276
-rw-r--r--xen/net/devinit.c109
-rw-r--r--xen/net/eth.c252
-rw-r--r--xen/net/skbuff.c501
-rw-r--r--xen/tools/Makefile6
-rw-r--r--xen/tools/elf-reloc.c118
272 files changed, 118704 insertions, 0 deletions
diff --git a/xen/Makefile b/xen/Makefile
new file mode 100644
index 0000000000..3bd4299075
--- /dev/null
+++ b/xen/Makefile
@@ -0,0 +1,41 @@
+
+export BASEDIR := $(shell pwd)
+
+include Rules.mk
+
+default: $(TARGET)
+ gzip -f -9 < $(TARGET) > $(TARGET).gz
+# objdump -D -S image >image.s
+
+install: $(TARGET)
+ gzip -f -9 < $(TARGET) > $(TARGET).gz
+ cp $(TARGET).gz ../../install/images/image
+
+clean: delete-links
+ $(MAKE) -C tools clean
+ $(MAKE) -C common clean
+ $(MAKE) -C net clean
+ $(MAKE) -C drivers clean
+ $(MAKE) -C arch/$(ARCH) clean
+ rm -f *.o $(TARGET)* *~ core
+
+$(TARGET): make-links
+ $(MAKE) -C tools
+ $(MAKE) -C common
+ $(MAKE) -C net
+ $(MAKE) -C drivers
+ $(MAKE) -C arch/$(ARCH)
+
+make-links:
+ ln -sf xeno include/linux
+ ln -sf asm-$(ARCH) include/asm
+
+delete-links:
+ rm -f include/linux include/asm
+
+SUBDIRS =arch common drivers net
+TAGS:
+ etags `find include/asm-$(ARCH) -name '*.h'`
+ find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a
+ find $(SUBDIRS) -name '*.[ch]' | xargs etags -a
+
diff --git a/xen/README b/xen/README
new file mode 100644
index 0000000000..3518b8254a
--- /dev/null
+++ b/xen/README
@@ -0,0 +1,145 @@
+
+*****************************************************
+ Xeno Hypervisor (18/7/02)
+
+1) Tree layout
+Looks rather like a simplified Linux :-)
+Headers are in include/xeno and include asm-<arch>.
+At build time we create symlinks:
+ include/linux -> include/xeno
+ include/asm -> include/asm-<arch>
+In this way, Linux device drivers should need less tweaking of
+their #include lines.
+
+For source files, mapping between hypervisor and Linux is:
+ Linux Hypervisor
+ ----- ----------
+ kernel/init/mm/lib -> common
+ net/* -> net/*
+ drivers/* -> drivers/*
+ arch/* -> arch/*
+
+Note that the use of #include <asm/...> and #include <linux/...> can
+lead to confusion, as such files will often exist on the system include
+path, even if a version doesn't exist within the hypervisor tree.
+Unfortunately '-nostdinc' cannot be specified to the compiler, as that
+prevents us using stdarg.h in the compiler's own header directory.
+
+We try to not modify things in driver/* as much as possible, so we can
+easily take updates from Linux. arch/* is basically straight from
+Linux, with fingers in Linux-specific pies hacked off. common/* has
+a lot of Linux code in it, but certain subsystems (task maintenance,
+low-level memory handling) have been replaced. net/* contains enough
+Linux-like gloop to get network drivers to work with little/no
+modification.
+
+2) Building
+'make': Builds ELF executable called 'image' in base directory
+'make install': gzip-compresses 'image' and copies it to TFTP server
+'make clean': removes *all* build and target files
+
+
+*****************************************************
+Random thoughts and stuff from here down...
+
+Todo list
+---------
+* Hypervisor need only directly map its own memory pool
+ (maybe 128MB, tops). That would need 0x08000000....
+ This would allow 512MB Linux with plenty room for vmalloc'ed areas.
+* Network device -- port drivers to hypervisor, implement virtual
+ driver for xeno-linux. Looks like Ethernet.
+ -- Hypervisor needs to do (at a minimum):
+ - packet filtering on tx (unicast IP only)
+ - packet demux on rx (unicast IP only)
+ - provide DHCP [maybedo something simpler?]
+ and ARP [at least for hypervisor IP address]
+
+
+Segment descriptor tables
+-------------------------
+We want to allow guest OSes to specify GDT and LDT tables using their
+own pages of memory (just like with page tables). So allow the following:
+ * new_table_entry(ptr, val)
+ [Allows insertion of a code, data, or LDT descriptor into given
+ location. Can simply be checked then poked, with no need to look at
+ page type.]
+ * new_GDT() -- relevent virtual pages are resolved to frames. Either
+ (i) page not present; or (ii) page is only mapped read-only and checks
+ out okay (then marked as special page). Old table is resolved first,
+ and the pages are unmarked (no longer special type).
+ * new_LDT() -- same as for new_GDT(), with same special page type.
+
+Page table updates must be hooked, so we look for updates to virtual page
+addresses in the GDT/LDT range. If map to not present, then old physpage
+has type_count decremented. If map to present, ensure read-only, check the
+page, and set special type.
+
+Merge set_{LDT,GDT} into update_baseptr, by passing four args:
+ update_baseptrs(mask, ptab, gdttab, ldttab);
+Update of ptab requires update of gtab (or set to internal default).
+Update of gtab requires update of ltab (or set to internal default).
+
+
+The hypervisor page cache
+-------------------------
+This will allow guest OSes to make use of spare pages in the system, but
+allow them to be immediately used for any new domains or memory requests.
+The idea is that, when a page is laundered and falls off Linux's clean_LRU
+list, rather than freeing it it becomes a candidate for passing down into
+the hypervisor. In return, xeno-linux may ask for one of its previously-
+cached pages back:
+ (page, new_id) = cache_query(page, old_id);
+If the requested page couldn't be kept, a blank page is returned.
+When would Linux make the query? Whenever it wants a page back without
+the delay or going to disc. Also, whenever a page would otherwise be
+flushed to disc.
+
+To try and add to the cache: (blank_page, new_id) = cache_query(page, NULL);
+ [NULL means "give me a blank page"].
+To try and retrieve from the cache: (page, new_id) = cache_query(x_page, id)
+ [we may request that x_page just be discarded, and therefore not impinge
+ on this domain's cache quota].
+
+
+Booting secondary processors
+----------------------------
+
+start_of_day (i386/setup.c)
+smp_boot_cpus (i386/smpboot.c)
+ * initialises boot CPU data
+ * parses APIC tables
+ * for each cpu:
+ do_boot_cpu (i386/smpboot.c)
+ * forks a new idle process
+ * points initial stack inside new task struct
+ * points initial EIP at a trampoline in very low memory
+ * frobs remote APIC....
+
+On other processor:
+ * trampoline sets GDT and IDT
+ * jumps at main boot address with magic register value
+ * after setting proper page and descriptor tables, jumps at...
+ initialize_secondary (i386/smpboot.c)
+ * simply reads ESP/EIP out of the (new) idle task
+ * this causes a jump to...
+ start_secondary (i386/smpboot.c)
+ * reset all processor state
+ * barrier, then write bitmasks to signal back to boot cpu
+ * then barrel into...
+ cpu_idle (i386/process.c)
+ [THIS IS PROBABLY REASONABLE -- BOOT CPU SHOULD KICK
+ SECONDARIES TO GET WORK DONE]
+
+
+SMP capabilities
+----------------
+
+Current intention is to allow hypervisor to schedule on all processors in
+SMP boxen, but to tie each domain to a single processor. This simplifies
+many SMP intricacies both in terms of correctness and efficiency (eg.
+TLB flushing, network packet delivery, ...).
+
+Clients can still make use of SMP by installing multiple domains on a single
+machine, and treating it as a fast cluster (at the very least, the
+hypervisor will have fast routing of locally-destined packets).
diff --git a/xen/Rules.mk b/xen/Rules.mk
new file mode 100644
index 0000000000..13a57ed550
--- /dev/null
+++ b/xen/Rules.mk
@@ -0,0 +1,36 @@
+
+ARCH := i386
+
+TARGET := $(BASEDIR)/image
+HDRS := $(wildcard $(BASEDIR)/include/xeno/*.h)
+HDRS += $(wildcard $(BASEDIR)/include/scsi/*.h)
+HDRS += $(wildcard $(BASEDIR)/include/hypervisor-ifs/*.h)
+HDRS += $(wildcard $(BASEDIR)/include/asm-$(ARCH)/*.h)
+
+C_SRCS := $(wildcard *.c)
+S_SRCS := $(wildcard *.S)
+OBJS := $(patsubst %.S,%.o,$(S_SRCS))
+OBJS += $(patsubst %.c,%.o,$(C_SRCS))
+
+# Note that link order matters!
+ALL_OBJS := $(BASEDIR)/common/common.o
+ALL_OBJS += $(BASEDIR)/net/network.o
+ALL_OBJS += $(BASEDIR)/drivers/char/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/net/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/block/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/ide/driver.o
+#ALL_OBJS += $(BASEDIR)/drivers/scsi/driver.o
+ALL_OBJS += $(BASEDIR)/arch/$(ARCH)/arch.o
+
+HOSTCC = gcc
+HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+
+include $(BASEDIR)/arch/$(ARCH)/Rules.mk
+
+%.o: %.c $(HDRS) Makefile
+ $(CC) -g $(CFLAGS) -c $< -o $@
+
+%.o: %.S $(HDRS) Makefile
+ $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+
diff --git a/xen/arch/i386/Makefile b/xen/arch/i386/Makefile
new file mode 100644
index 0000000000..6778324206
--- /dev/null
+++ b/xen/arch/i386/Makefile
@@ -0,0 +1,17 @@
+
+include $(BASEDIR)/Rules.mk
+
+# What happens here? We link monitor object files together, starting
+# at MONITOR_BASE (a very high address). But bootloader cannot put
+# things there, so we initially load at LOAD_BASE. A hacky little
+# tool called `elf-reloc' is used to modify segment offsets from
+# MONITOR_BASE-relative to LOAD_BASE-relative.
+# (NB. Linux gets round this by turning its image into raw binary, then
+# wrapping that with a low-memory bootstrapper.)
+default: boot/boot.o $(OBJS)
+ $(LD) -r -o arch.o $(OBJS)
+ $(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET)
+ $(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET)
+
+clean:
+ rm -f *.o *~ core boot/*.o boot/*~ boot/core
diff --git a/xen/arch/i386/Rules.mk b/xen/arch/i386/Rules.mk
new file mode 100644
index 0000000000..8a672e228a
--- /dev/null
+++ b/xen/arch/i386/Rules.mk
@@ -0,0 +1,14 @@
+########################################
+# x86-specific definitions
+
+CC := gcc
+LD := ld
+# Linker should relocate monitor to this address
+MONITOR_BASE := 0xFC500000
+# Bootloader should load monitor to this real address
+LOAD_BASE := 0x00100000
+CFLAGS := -nostdinc -fno-builtin -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE)
+CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -DNDEBUG
+LDFLAGS := -T xeno.lds -N
+
+
diff --git a/xen/arch/i386/acpitable.c b/xen/arch/i386/acpitable.c
new file mode 100644
index 0000000000..1078db3eee
--- /dev/null
+++ b/xen/arch/i386/acpitable.c
@@ -0,0 +1,549 @@
+/*
+ * acpitable.c - IA32-specific ACPI boot-time initialization (Revision: 1)
+ *
+ * Copyright (C) 1999 Andrew Henroid
+ * Copyright (C) 2001 Richard Schaal
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2001 Arjan van de Ven <arjanv@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * $Id: acpitable.c,v 1.7 2001/11/04 12:21:18 fenrus Exp $
+ */
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+/*#include <xeno/stddef.h>*/
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <asm/mpspec.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+/*#include <asm/pgtable.h>*/
+
+#include "acpitable.h"
+
+static acpi_table_handler acpi_boot_ops[ACPI_TABLE_COUNT];
+
+
+static unsigned char __init
+acpi_checksum(void *buffer, int length)
+{
+ int i;
+ unsigned char *bytebuffer;
+ unsigned char sum = 0;
+
+ if (!buffer || length <= 0)
+ return 0;
+
+ bytebuffer = (unsigned char *) buffer;
+
+ for (i = 0; i < length; i++)
+ sum += *(bytebuffer++);
+
+ return sum;
+}
+
+static void __init
+acpi_print_table_header(acpi_table_header * header)
+{
+ if (!header)
+ return;
+
+ printk(KERN_INFO "ACPI table found: %.4s v%d [%.6s %.8s %d.%d]\n",
+ header->signature, header->revision, header->oem_id,
+ header->oem_table_id, header->oem_revision >> 16,
+ header->oem_revision & 0xffff);
+
+ return;
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_tb_scan_memory_for_rsdp
+ *
+ * PARAMETERS: address - Starting pointer for search
+ * length - Maximum length to search
+ *
+ * RETURN: Pointer to the RSDP if found and valid, otherwise NULL.
+ *
+ * DESCRIPTION: Search a block of memory for the RSDP signature
+ *
+ ******************************************************************************/
+
+static void *__init
+acpi_tb_scan_memory_for_rsdp(void *address, int length)
+{
+ u32 offset;
+
+ if (length <= 0)
+ return NULL;
+
+ /* Search from given start addr for the requested length */
+
+ offset = 0;
+
+ while (offset < length) {
+ /* The signature must match and the checksum must be correct */
+ if (strncmp(address, RSDP_SIG, sizeof(RSDP_SIG) - 1) == 0 &&
+ acpi_checksum(address, RSDP_CHECKSUM_LENGTH) == 0) {
+ /* If so, we have found the RSDP */
+ printk(KERN_INFO "ACPI: RSDP located at physical address %p\n",
+ address);
+ return address;
+ }
+ offset += RSDP_SCAN_STEP;
+ address += RSDP_SCAN_STEP;
+ }
+
+ /* Searched entire block, no RSDP was found */
+ printk(KERN_INFO "ACPI: Searched entire block, no RSDP was found.\n");
+ return NULL;
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_find_root_pointer
+ *
+ * PARAMETERS: none
+ *
+ * RETURN: physical address of the RSDP
+ *
+ * DESCRIPTION: Search lower 1_mbyte of memory for the root system descriptor
+ * pointer structure. If it is found, set *RSDP to point to it.
+ *
+ * NOTE: The RSDP must be either in the first 1_k of the Extended
+ * BIOS Data Area or between E0000 and FFFFF (ACPI 1.0 section
+ * 5.2.2; assertion #421).
+ *
+ ******************************************************************************/
+
+static struct acpi_table_rsdp * __init
+acpi_find_root_pointer(void)
+{
+ struct acpi_table_rsdp * rsdp;
+
+ /*
+ * Physical address is given
+ */
+ /*
+ * Region 1) Search EBDA (low memory) paragraphs
+ */
+ rsdp = acpi_tb_scan_memory_for_rsdp(__va(LO_RSDP_WINDOW_BASE),
+ LO_RSDP_WINDOW_SIZE);
+
+ if (rsdp)
+ return rsdp;
+
+ /*
+ * Region 2) Search upper memory: 16-byte boundaries in E0000h-F0000h
+ */
+ rsdp = acpi_tb_scan_memory_for_rsdp(__va(HI_RSDP_WINDOW_BASE),
+ HI_RSDP_WINDOW_SIZE);
+
+
+
+ if (rsdp)
+ return rsdp;
+
+ printk(KERN_ERR "ACPI: System description tables not found\n");
+ return NULL;
+}
+
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+static __init char *
+__va_range(unsigned long phys, unsigned long size)
+{
+ unsigned long base, offset, mapped_size;
+ int idx;
+
+ offset = phys & (PAGE_SIZE - 1);
+ mapped_size = PAGE_SIZE - offset;
+ set_fixmap(FIX_IO_APIC_BASE_END, phys);
+ base = fix_to_virt(FIX_IO_APIC_BASE_END);
+ dprintk("__va_range(0x%lx, 0x%lx): idx=%d mapped at %lx\n", phys, size,
+ FIX_IO_APIC_BASE_END, base);
+
+ /*
+ * Most cases can be covered by the below.
+ */
+ idx = FIX_IO_APIC_BASE_END;
+ while (mapped_size < size) {
+ if (--idx < FIX_IO_APIC_BASE_0)
+ return 0; /* cannot handle this */
+ phys += PAGE_SIZE;
+ set_fixmap(idx, phys);
+ mapped_size += PAGE_SIZE;
+ }
+
+ return ((unsigned char *) base + offset);
+}
+
+static int __init acpi_tables_init(void)
+{
+ int result = -ENODEV;
+ acpi_table_header *header = NULL;
+ struct acpi_table_rsdp *rsdp = NULL;
+ struct acpi_table_rsdt *rsdt = NULL;
+ struct acpi_table_rsdt saved_rsdt;
+ int tables = 0;
+ int type = 0;
+ int i = 0;
+
+
+ rsdp = (struct acpi_table_rsdp *) acpi_find_root_pointer();
+
+ if (!rsdp)
+ return -ENODEV;
+
+ printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
+ rsdp->oem_id);
+
+ if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) {
+ printk(KERN_WARNING "RSDP table signature incorrect\n");
+ return -EINVAL;
+ }
+
+ rsdt = (struct acpi_table_rsdt *)
+ __va_range(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt));
+
+ if (!rsdt) {
+ printk(KERN_WARNING "ACPI: Invalid root system description tables (RSDT)\n");
+ return -ENODEV;
+ }
+
+ header = & rsdt->header;
+ acpi_print_table_header(header);
+
+ if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) {
+ printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
+ return -ENODEV;
+ }
+
+ /*
+ * The number of tables is computed by taking the
+ * size of all entries (header size minus total
+ * size of RSDT) divided by the size of each entry
+ * (4-byte table pointers).
+ */
+ tables = (header->length - sizeof(acpi_table_header)) / 4;
+
+ memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
+
+ if (saved_rsdt.header.length > sizeof(saved_rsdt)) {
+ printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", saved_rsdt.header.length);
+ return -ENODEV;
+ }
+
+ for (i = 0; i < tables; i++) {
+ /* Map in header, then map in full table length. */
+ header = (acpi_table_header *)
+ __va_range(saved_rsdt.entry[i],
+ sizeof(acpi_table_header));
+ if (!header)
+ break;
+ header = (acpi_table_header *)
+ __va_range(saved_rsdt.entry[i], header->length);
+ if (!header)
+ break;
+
+ acpi_print_table_header(header);
+
+ if (acpi_checksum(header,header->length)) {
+ printk(KERN_WARNING "ACPI %s has invalid checksum\n",
+ acpi_table_signatures[i]);
+ continue;
+ }
+
+ for (type = 0; type < ACPI_TABLE_COUNT; type++)
+ if (!strncmp((char *) &header->signature,
+ acpi_table_signatures[type],strlen(acpi_table_signatures[type])))
+ break;
+
+ if (type >= ACPI_TABLE_COUNT) {
+ printk(KERN_WARNING "ACPI: Unsupported table %.4s\n",
+ header->signature);
+ continue;
+ }
+
+
+ if (!acpi_boot_ops[type])
+ continue;
+
+ result = acpi_boot_ops[type] (header,
+ (unsigned long) saved_rsdt.
+ entry[i]);
+ }
+
+ return result;
+}
+
+static int total_cpus __initdata = 0;
+int have_acpi_tables;
+
+extern void __init MP_processor_info(struct mpc_config_processor *);
+
+static void __init
+acpi_parse_lapic(struct acpi_table_lapic *local_apic)
+{
+ struct mpc_config_processor proc_entry;
+ int ix = 0;
+
+ if (!local_apic)
+ return;
+
+ printk(KERN_INFO "LAPIC (acpi_id[0x%04x] id[0x%x] enabled[%d])\n",
+ local_apic->acpi_id, local_apic->id, local_apic->flags.enabled);
+
+ printk(KERN_INFO "CPU %d (0x%02x00)", total_cpus, local_apic->id);
+
+ if (local_apic->flags.enabled) {
+ printk(" enabled");
+ ix = local_apic->id;
+ if (ix >= MAX_APICS) {
+ printk(KERN_WARNING
+ "Processor #%d INVALID - (Max ID: %d).\n", ix,
+ MAX_APICS);
+ return;
+ }
+ /*
+ * Fill in the info we want to save. Not concerned about
+ * the processor ID. Processor features aren't present in
+ * the table.
+ */
+ proc_entry.mpc_type = MP_PROCESSOR;
+ proc_entry.mpc_apicid = local_apic->id;
+ proc_entry.mpc_cpuflag = CPU_ENABLED;
+ if (proc_entry.mpc_apicid == boot_cpu_physical_apicid) {
+ printk(" (BSP)");
+ proc_entry.mpc_cpuflag |= CPU_BOOTPROCESSOR;
+ }
+ proc_entry.mpc_cpufeature =
+ (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) |
+ boot_cpu_data.x86_mask;
+ proc_entry.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ proc_entry.mpc_reserved[0] = 0;
+ proc_entry.mpc_reserved[1] = 0;
+ proc_entry.mpc_apicver = 0x10; /* integrated APIC */
+ MP_processor_info(&proc_entry);
+ } else {
+ printk(" disabled");
+ }
+ printk("\n");
+
+ total_cpus++;
+ return;
+}
+
+static void __init
+acpi_parse_ioapic(struct acpi_table_ioapic *ioapic)
+{
+
+ if (!ioapic)
+ return;
+
+ printk(KERN_INFO
+ "IOAPIC (id[0x%x] address[0x%x] global_irq_base[0x%x])\n",
+ ioapic->id, ioapic->address, ioapic->global_irq_base);
+
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_WARNING
+ "Max # of I/O APICs (%d) exceeded (found %d).\n",
+ MAX_IO_APICS, nr_ioapics);
+/* panic("Recompile kernel with bigger MAX_IO_APICS!\n"); */
+ }
+}
+
+
+/* Interrupt source overrides inform the machine about exceptions
+ to the normal "PIC" mode interrupt routing */
+
+static void __init
+acpi_parse_int_src_ovr(struct acpi_table_int_src_ovr *intsrc)
+{
+ if (!intsrc)
+ return;
+
+ printk(KERN_INFO
+ "INT_SRC_OVR (bus[%d] irq[0x%x] global_irq[0x%x] polarity[0x%x] trigger[0x%x])\n",
+ intsrc->bus, intsrc->bus_irq, intsrc->global_irq,
+ intsrc->flags.polarity, intsrc->flags.trigger);
+}
+
+/*
+ * At this point, we look at the interrupt assignment entries in the MPS
+ * table.
+ */
+
+static void __init acpi_parse_nmi_src(struct acpi_table_nmi_src *nmisrc)
+{
+ if (!nmisrc)
+ return;
+
+ printk(KERN_INFO
+ "NMI_SRC (polarity[0x%x] trigger[0x%x] global_irq[0x%x])\n",
+ nmisrc->flags.polarity, nmisrc->flags.trigger,
+ nmisrc->global_irq);
+
+}
+static void __init
+acpi_parse_lapic_nmi(struct acpi_table_lapic_nmi *localnmi)
+{
+ if (!localnmi)
+ return;
+
+ printk(KERN_INFO
+ "LAPIC_NMI (acpi_id[0x%04x] polarity[0x%x] trigger[0x%x] lint[0x%x])\n",
+ localnmi->acpi_id, localnmi->flags.polarity,
+ localnmi->flags.trigger, localnmi->lint);
+}
+static void __init
+acpi_parse_lapic_addr_ovr(struct acpi_table_lapic_addr_ovr *lapic_addr_ovr)
+{
+ if (!lapic_addr_ovr)
+ return;
+
+ printk(KERN_INFO "LAPIC_ADDR_OVR (address[0x%lx])\n",
+ (unsigned long) lapic_addr_ovr->address);
+
+}
+
+static void __init
+acpi_parse_plat_int_src(struct acpi_table_plat_int_src *plintsrc)
+{
+ if (!plintsrc)
+ return;
+
+ printk(KERN_INFO
+ "PLAT_INT_SRC (polarity[0x%x] trigger[0x%x] type[0x%x] id[0x%04x] eid[0x%x] iosapic_vector[0x%x] global_irq[0x%x]\n",
+ plintsrc->flags.polarity, plintsrc->flags.trigger,
+ plintsrc->type, plintsrc->id, plintsrc->eid,
+ plintsrc->iosapic_vector, plintsrc->global_irq);
+}
+static int __init
+acpi_parse_madt(acpi_table_header * header, unsigned long phys)
+{
+
+ struct acpi_table_madt *madt;
+ acpi_madt_entry_header *entry_header;
+ int table_size;
+
+ madt = (struct acpi_table_madt *) __va_range(phys, header->length);
+
+ if (!madt)
+ return -EINVAL;
+
+ table_size = (int) (header->length - sizeof(*madt));
+ entry_header =
+ (acpi_madt_entry_header *) ((void *) madt + sizeof(*madt));
+
+ while (entry_header && (table_size > 0)) {
+ switch (entry_header->type) {
+ case ACPI_MADT_LAPIC:
+ acpi_parse_lapic((struct acpi_table_lapic *)
+ entry_header);
+ break;
+ case ACPI_MADT_IOAPIC:
+ acpi_parse_ioapic((struct acpi_table_ioapic *)
+ entry_header);
+ break;
+ case ACPI_MADT_INT_SRC_OVR:
+ acpi_parse_int_src_ovr((struct acpi_table_int_src_ovr *)
+ entry_header);
+ break;
+ case ACPI_MADT_NMI_SRC:
+ acpi_parse_nmi_src((struct acpi_table_nmi_src *)
+ entry_header);
+ break;
+ case ACPI_MADT_LAPIC_NMI:
+ acpi_parse_lapic_nmi((struct acpi_table_lapic_nmi *)
+ entry_header);
+ break;
+ case ACPI_MADT_LAPIC_ADDR_OVR:
+ acpi_parse_lapic_addr_ovr((struct
+ acpi_table_lapic_addr_ovr *)
+ entry_header);
+ break;
+ case ACPI_MADT_PLAT_INT_SRC:
+ acpi_parse_plat_int_src((struct acpi_table_plat_int_src
+ *) entry_header);
+ break;
+ default:
+ printk(KERN_WARNING
+ "Unsupported MADT entry type 0x%x\n",
+ entry_header->type);
+ break;
+ }
+ table_size -= entry_header->length;
+ entry_header =
+ (acpi_madt_entry_header *) ((void *) entry_header +
+ entry_header->length);
+ }
+
+ if (!total_cpus) {
+ printk("ACPI: No Processors found in the APCI table.\n");
+ return -EINVAL;
+ }
+
+ printk(KERN_INFO "%d CPUs total\n", total_cpus);
+
+ if (madt->lapic_address)
+ mp_lapic_addr = madt->lapic_address;
+ else
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ printk(KERN_INFO "Local APIC address %x\n", madt->lapic_address);
+
+ return 0;
+}
+
+extern int opt_noacpi;
+
+/*
+ * Configure the processor info using MADT in the ACPI tables. If we fail to
+ * configure that, then we use the MPS tables.
+ */
+void __init
+config_acpi_tables(void)
+{
+ memset(&acpi_boot_ops, 0, sizeof(acpi_boot_ops));
+ acpi_boot_ops[ACPI_APIC] = acpi_parse_madt;
+
+ if (!opt_noacpi && !acpi_tables_init()) {
+ have_acpi_tables = 1;
+ printk("Enabling the CPU's according to the ACPI table\n");
+ }
+}
diff --git a/xen/arch/i386/acpitable.h b/xen/arch/i386/acpitable.h
new file mode 100644
index 0000000000..ddf1c84a65
--- /dev/null
+++ b/xen/arch/i386/acpitable.h
@@ -0,0 +1,260 @@
+/*
+ * acpitable.c - IA32-specific ACPI boot-time initialization (Revision: 1)
+ *
+ * Copyright (C) 1999 Andrew Henroid
+ * Copyright (C) 2001 Richard Schaal
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2001 Arjan van de Ven <arjanv@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * $Id: acpitable.h,v 1.3 2001/11/03 22:41:34 fenrus Exp $
+ */
+
+/*
+ * The following codes are cut&pasted from drivers/acpi. Part of the code
+ * there can be not updated or delivered yet.
+ * To avoid conflicts when CONFIG_ACPI is defined, the following codes are
+ * modified so that they are self-contained in this file.
+ * -- jun
+ */
+
+#ifndef _HEADER_ACPITABLE_H_
+#define _HEADER_ACPITABLE_H_
+
+#define dprintk printk
+typedef unsigned int ACPI_TBLPTR;
+
+typedef struct { /* ACPI common table header */
+ char signature[4]; /* identifies type of table */
+ u32 length; /* length of table,
+ in bytes, * including header */
+ u8 revision; /* specification minor version # */
+ u8 checksum; /* to make sum of entire table == 0 */
+ char oem_id[6]; /* OEM identification */
+ char oem_table_id[8]; /* OEM table identification */
+ u32 oem_revision; /* OEM revision number */
+ char asl_compiler_id[4]; /* ASL compiler vendor ID */
+ u32 asl_compiler_revision; /* ASL compiler revision number */
+} acpi_table_header __attribute__ ((packed));;
+
+enum {
+ ACPI_APIC = 0,
+ ACPI_BOOT,
+ ACPI_DBGP,
+ ACPI_DSDT,
+ ACPI_ECDT,
+ ACPI_ETDT,
+ ACPI_FACP,
+ ACPI_FACS,
+ ACPI_OEMX,
+ ACPI_PSDT,
+ ACPI_SBST,
+ ACPI_SLIT,
+ ACPI_SPCR,
+ ACPI_SRAT,
+ ACPI_SSDT,
+ ACPI_SPMI,
+ ACPI_XSDT,
+ ACPI_TABLE_COUNT
+};
+
+static char *acpi_table_signatures[ACPI_TABLE_COUNT] = {
+ "APIC",
+ "BOOT",
+ "DBGP",
+ "DSDT",
+ "ECDT",
+ "ETDT",
+ "FACP",
+ "FACS",
+ "OEM",
+ "PSDT",
+ "SBST",
+ "SLIT",
+ "SPCR",
+ "SRAT",
+ "SSDT",
+ "SPMI",
+ "XSDT"
+};
+
+struct acpi_table_madt {
+ acpi_table_header header;
+ u32 lapic_address;
+ struct {
+ u32 pcat_compat:1;
+ u32 reserved:31;
+ } flags __attribute__ ((packed));
+} __attribute__ ((packed));;
+
+enum {
+ ACPI_MADT_LAPIC = 0,
+ ACPI_MADT_IOAPIC,
+ ACPI_MADT_INT_SRC_OVR,
+ ACPI_MADT_NMI_SRC,
+ ACPI_MADT_LAPIC_NMI,
+ ACPI_MADT_LAPIC_ADDR_OVR,
+ ACPI_MADT_IOSAPIC,
+ ACPI_MADT_LSAPIC,
+ ACPI_MADT_PLAT_INT_SRC,
+ ACPI_MADT_ENTRY_COUNT
+};
+
+#define RSDP_SIG "RSD PTR "
+#define RSDT_SIG "RSDT"
+
+#define ACPI_DEBUG_PRINT(pl)
+
+#define ACPI_MEMORY_MODE 0x01
+#define ACPI_LOGICAL_ADDRESSING 0x00
+#define ACPI_PHYSICAL_ADDRESSING 0x01
+
+#define LO_RSDP_WINDOW_BASE 0 /* Physical Address */
+#define HI_RSDP_WINDOW_BASE 0xE0000 /* Physical Address */
+#define LO_RSDP_WINDOW_SIZE 0x400
+#define HI_RSDP_WINDOW_SIZE 0x20000
+#define RSDP_SCAN_STEP 16
+#define RSDP_CHECKSUM_LENGTH 20
+
+typedef int (*acpi_table_handler) (acpi_table_header * header, unsigned long);
+
+struct acpi_table_rsdp {
+ char signature[8];
+ u8 checksum;
+ char oem_id[6];
+ u8 revision;
+ u32 rsdt_address;
+} __attribute__ ((packed));
+
+struct acpi_table_rsdt {
+ acpi_table_header header;
+ u32 entry[ACPI_TABLE_COUNT];
+} __attribute__ ((packed));
+
+typedef struct {
+ u8 type;
+ u8 length;
+} acpi_madt_entry_header __attribute__ ((packed));
+
+typedef struct {
+ u16 polarity:2;
+ u16 trigger:2;
+ u16 reserved:12;
+} acpi_madt_int_flags __attribute__ ((packed));
+
+struct acpi_table_lapic {
+ acpi_madt_entry_header header;
+ u8 acpi_id;
+ u8 id;
+ struct {
+ u32 enabled:1;
+ u32 reserved:31;
+ } flags __attribute__ ((packed));
+} __attribute__ ((packed));
+
+struct acpi_table_ioapic {
+ acpi_madt_entry_header header;
+ u8 id;
+ u8 reserved;
+ u32 address;
+ u32 global_irq_base;
+} __attribute__ ((packed));
+
+struct acpi_table_int_src_ovr {
+ acpi_madt_entry_header header;
+ u8 bus;
+ u8 bus_irq;
+ u32 global_irq;
+ acpi_madt_int_flags flags;
+} __attribute__ ((packed));
+
+struct acpi_table_nmi_src {
+ acpi_madt_entry_header header;
+ acpi_madt_int_flags flags;
+ u32 global_irq;
+} __attribute__ ((packed));
+
+struct acpi_table_lapic_nmi {
+ acpi_madt_entry_header header;
+ u8 acpi_id;
+ acpi_madt_int_flags flags;
+ u8 lint;
+} __attribute__ ((packed));
+
+struct acpi_table_lapic_addr_ovr {
+ acpi_madt_entry_header header;
+ u8 reserved[2];
+ u64 address;
+} __attribute__ ((packed));
+
+struct acpi_table_iosapic {
+ acpi_madt_entry_header header;
+ u8 id;
+ u8 reserved;
+ u32 global_irq_base;
+ u64 address;
+} __attribute__ ((packed));
+
+struct acpi_table_lsapic {
+ acpi_madt_entry_header header;
+ u8 acpi_id;
+ u8 id;
+ u8 eid;
+ u8 reserved[3];
+ struct {
+ u32 enabled:1;
+ u32 reserved:31;
+ } flags;
+} __attribute__ ((packed));
+
+struct acpi_table_plat_int_src {
+ acpi_madt_entry_header header;
+ acpi_madt_int_flags flags;
+ u8 type;
+ u8 id;
+ u8 eid;
+ u8 iosapic_vector;
+ u32 global_irq;
+ u32 reserved;
+} __attribute__ ((packed));
+
+/*
+ * ACPI Table Descriptor. One per ACPI table
+ */
+typedef struct acpi_table_desc {
+ struct acpi_table_desc *prev;
+ struct acpi_table_desc *next;
+ struct acpi_table_desc *installed_desc;
+ acpi_table_header *pointer;
+ void *base_pointer;
+ u8 *aml_pointer;
+ u64 physical_address;
+ u32 aml_length;
+ u32 length;
+ u32 count;
+ u16 table_id;
+ u8 type;
+ u8 allocation;
+ u8 loaded_into_namespace;
+
+} acpi_table_desc __attribute__ ((packed));;
+
+#endif
diff --git a/xen/arch/i386/apic.c b/xen/arch/i386/apic.c
new file mode 100644
index 0000000000..9b999df951
--- /dev/null
+++ b/xen/arch/i386/apic.c
@@ -0,0 +1,845 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: apic.c
+ * Author:
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: programmable APIC timer interface for accurate timers
+ * modified version of Linux' apic.c
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+/*
+ * Local APIC handling, local APIC timers
+ *
+ * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively.
+ */
+
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/sched.h>
+#include <xeno/irq.h>
+#include <xeno/delay.h>
+#include <asm/mc146818rtc.h>
+#include <asm/msr.h>
+#include <xeno/errno.h>
+#include <asm/atomic.h>
+#include <xeno/smp.h>
+#include <xeno/interrupt.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+#include <xeno/ac_timer.h>
+
+#undef APIC_TIME_TRACE
+#ifdef APIC_TIME_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+
+/* Using APIC to generate smp_local_timer_interrupt? */
+int using_apic_timer = 0;
+
+int get_maxlvt(void)
+{
+ unsigned int v, ver, maxlvt;
+
+ v = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(v);
+ /* 82489DXs do not report # of LVT entries. */
+ maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
+ return maxlvt;
+}
+
+static void clear_local_APIC(void)
+{
+ int maxlvt;
+ unsigned long v;
+
+ maxlvt = get_maxlvt();
+
+ /*
+ * Careful: we have to set masks only first to deassert
+ * any level-triggered sources.
+ */
+ v = apic_read(APIC_LVTT);
+ apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT1);
+ apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
+ if (maxlvt >= 3) {
+ v = apic_read(APIC_LVTERR);
+ apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
+ }
+ if (maxlvt >= 4) {
+ v = apic_read(APIC_LVTPC);
+ apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
+ }
+
+ /*
+ * Clean APIC state for other OSs:
+ */
+ apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+ apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
+ if (maxlvt >= 3)
+ apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
+ if (maxlvt >= 4)
+ apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+}
+
+void __init connect_bsp_APIC(void)
+{
+ if (pic_mode) {
+ /*
+ * Do not trust the local APIC being empty at bootup.
+ */
+ clear_local_APIC();
+ /*
+ * PIC mode, enable APIC mode in the IMCR, i.e.
+ * connect BSP's local APIC to INT and NMI lines.
+ */
+ printk("leaving PIC mode, enabling APIC mode.\n");
+ outb(0x70, 0x22);
+ outb(0x01, 0x23);
+ }
+}
+
+void disconnect_bsp_APIC(void)
+{
+ if (pic_mode) {
+ /*
+ * Put the board back into PIC mode (has an effect
+ * only on certain older boards). Note that APIC
+ * interrupts, including IPIs, won't work beyond
+ * this point! The only exception are INIT IPIs.
+ */
+ printk("disabling APIC mode, entering PIC mode.\n");
+ outb(0x70, 0x22);
+ outb(0x00, 0x23);
+ }
+}
+
+void disable_local_APIC(void)
+{
+ unsigned long value;
+
+ clear_local_APIC();
+
+ /*
+ * Disable APIC (implies clearing of registers
+ * for 82489DX!).
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_SPIV_APIC_ENABLED;
+ apic_write_around(APIC_SPIV, value);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+ unsigned int reg0, reg1;
+
+ /*
+ * The version register is read-only in a real APIC.
+ */
+ reg0 = apic_read(APIC_LVR);
+ Dprintk("Getting VERSION: %x\n", reg0);
+ apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+ reg1 = apic_read(APIC_LVR);
+ Dprintk("Getting VERSION: %x\n", reg1);
+
+ /*
+ * The two version reads above should print the same
+ * numbers. If the second one is different, then we
+ * poke at a non-APIC.
+ */
+ if (reg1 != reg0)
+ return 0;
+
+ /*
+ * Check if the version looks reasonably.
+ */
+ reg1 = GET_APIC_VERSION(reg0);
+ if (reg1 == 0x00 || reg1 == 0xff)
+ return 0;
+ reg1 = get_maxlvt();
+ if (reg1 < 0x02 || reg1 == 0xff)
+ return 0;
+
+ /*
+ * The ID register is read/write in a real APIC.
+ */
+ reg0 = apic_read(APIC_ID);
+ Dprintk("Getting ID: %x\n", reg0);
+ apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+ reg1 = apic_read(APIC_ID);
+ Dprintk("Getting ID: %x\n", reg1);
+ apic_write(APIC_ID, reg0);
+ if (reg1 != (reg0 ^ APIC_ID_MASK))
+ return 0;
+
+ /*
+ * The next two are just to see if we have sane values.
+ * They're only really relevant if we're in Virtual Wire
+ * compatibility mode, but most boxes are anymore.
+ */
+ reg0 = apic_read(APIC_LVT0);
+ Dprintk("Getting LVT0: %x\n", reg0);
+ reg1 = apic_read(APIC_LVT1);
+ Dprintk("Getting LVT1: %x\n", reg1);
+
+ return 1;
+}
+
+void __init sync_Arb_IDs(void)
+{
+ /* Wait for idle. */
+ apic_wait_icr_idle();
+
+ Dprintk("Synchronizing Arb IDs.\n");
+ apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
+ | APIC_DM_INIT);
+}
+
+extern void __error_in_apic_c (void);
+
+/*
+ * WAS: An initial setup of the virtual wire mode.
+ * NOW: We don't bother doing anything. All we need at this point
+ * is to receive timer ticks, so that 'jiffies' is incremented.
+ * If we're SMP, then we can assume BIOS did setup for us.
+ * If we're UP, then the APIC should be disabled (it is at reset).
+ * If we're UP and APIC is enabled, then BIOS is clever and has
+ * probably done initial interrupt routing for us.
+ */
+void __init init_bsp_APIC(void)
+{
+}
+
+void __init setup_local_APIC (void)
+{
+ unsigned long value, ver, maxlvt;
+
+ value = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(value);
+
+ if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
+ __error_in_apic_c();
+
+ /* Double-check wether this APIC is really registered. */
+ if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
+ BUG();
+
+ /*
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+
+ /*
+ * In clustered apic mode, the firmware does this for us
+ * Put the APIC into flat delivery mode.
+ * Must be "all ones" explicitly for 82489DX.
+ */
+ apic_write_around(APIC_DFR, 0xffffffff);
+
+ /*
+ * Set up the logical destination ID.
+ */
+ value = apic_read(APIC_LDR);
+ value &= ~APIC_LDR_MASK;
+ value |= (1<<(smp_processor_id()+24));
+ apic_write_around(APIC_LDR, value);
+
+ /*
+ * Set Task Priority to 'accept all'. We never change this
+ * later on.
+ */
+ value = apic_read(APIC_TASKPRI);
+ value &= ~APIC_TPRI_MASK;
+ apic_write_around(APIC_TASKPRI, value);
+
+ /*
+ * Now that we are all set up, enable the APIC
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ /*
+ * Enable APIC
+ */
+ value |= APIC_SPIV_APIC_ENABLED;
+
+ /* Enable focus processor (bit==0) */
+ value &= ~APIC_SPIV_FOCUS_DISABLED;
+
+ /* Set spurious IRQ vector */
+ value |= SPURIOUS_APIC_VECTOR;
+ apic_write_around(APIC_SPIV, value);
+
+ /*
+ * Set up LVT0, LVT1:
+ *
+ * set up through-local-APIC on the BP's LINT0. This is not
+ * strictly necessery in pure symmetric-IO mode, but sometimes
+ * we delegate interrupts to the 8259A.
+ */
+ /*
+ * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+ */
+ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+ if (!smp_processor_id()) {
+ value = APIC_DM_EXTINT;
+ printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
+ } else {
+ value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+ printk("masked ExtINT on CPU#%d\n", smp_processor_id());
+ }
+ apic_write_around(APIC_LVT0, value);
+
+ /*
+ * only the BP should see the LINT1 NMI signal, obviously.
+ */
+ if (!smp_processor_id())
+ value = APIC_DM_NMI;
+ else
+ value = APIC_DM_NMI | APIC_LVT_MASKED;
+ if (!APIC_INTEGRATED(ver)) /* 82489DX */
+ value |= APIC_LVT_LEVEL_TRIGGER;
+ apic_write_around(APIC_LVT1, value);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ maxlvt = get_maxlvt();
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ value = apic_read(APIC_ESR);
+ printk("ESR value before enabling vector: %08lx\n", value);
+
+ value = ERROR_APIC_VECTOR; /* enables sending errors */
+ apic_write_around(APIC_LVTERR, value);
+ /* spec says clear errors after enabling vector. */
+ if (maxlvt > 3)
+ apic_write(APIC_ESR, 0);
+ value = apic_read(APIC_ESR);
+ printk("ESR value after enabling vector: %08lx\n", value);
+ } else {
+ printk("No ESR for 82489DX.\n");
+ }
+}
+
+
+static inline void apic_pm_init1(void) { }
+static inline void apic_pm_init2(void) { }
+
+
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ */
+
+static int __init detect_init_APIC (void)
+{
+ u32 h, l, features;
+ extern void get_cpu_vendor(struct cpuinfo_x86*);
+
+ /* Workaround for us being called before identify_cpu(). */
+ get_cpu_vendor(&boot_cpu_data);
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
+ break;
+ goto no_apic;
+ case X86_VENDOR_INTEL:
+ if (boot_cpu_data.x86 == 6 ||
+ (boot_cpu_data.x86 == 15 && cpu_has_apic) ||
+ (boot_cpu_data.x86 == 5 && cpu_has_apic))
+ break;
+ goto no_apic;
+ default:
+ goto no_apic;
+ }
+
+ if (!cpu_has_apic) {
+ /*
+ * Some BIOSes disable the local APIC in the
+ * APIC_BASE MSR. This can only be done in
+ * software for Intel P6 and AMD K7 (Model > 1).
+ */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+ printk("Local APIC disabled by BIOS -- reenabling.\n");
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ }
+ }
+
+ /* The APIC feature bit should now be enabled in `cpuid' */
+ features = cpuid_edx(1);
+ if (!(features & (1 << X86_FEATURE_APIC))) {
+ printk("Could not enable APIC!\n");
+ return -1;
+ }
+
+ set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+ boot_cpu_physical_apicid = 0;
+
+ printk("Found and enabled local APIC!\n");
+ apic_pm_init1();
+ return 0;
+
+ no_apic:
+ printk("No local APIC present or hardware disabled\n");
+ return -1;
+}
+
+void __init init_apic_mappings(void)
+{
+ unsigned long apic_phys = 0;
+
+ /*
+ * If no local APIC can be found then set up a fake all zeroes page to
+ * simulate the local APIC and another one for the IO-APIC.
+ */
+ if (!smp_found_config && detect_init_APIC()) {
+ apic_phys = get_free_page(GFP_KERNEL);
+ apic_phys = __pa(apic_phys);
+ } else
+ apic_phys = mp_lapic_addr;
+
+ set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+ Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
+
+ /*
+ * Fetch the APIC ID of the BSP in case we have a
+ * default configuration (or the MP table is broken).
+ */
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+#ifdef CONFIG_X86_IO_APIC
+ {
+ unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0;
+ int i;
+
+ for (i = 0; i < nr_ioapics; i++) {
+ if (smp_found_config)
+ ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+ set_fixmap_nocache(idx, ioapic_phys);
+ Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx), ioapic_phys);
+ idx++;
+ }
+ }
+#endif
+}
+
+/*****************************************************************************
+ * APIC calibration
+ *
+ * The APIC is programmed in bus cycles.
+ * Timeout values should specified in real time units.
+ * The "cheapest" time source is the cyclecounter.
+ *
+ * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
+ *
+ * The calibration is currently a bit shoddy since it requires the external
+ * timer chip to generate periodic timer interupts.
+ *****************************************************************************/
+
+/* used for system time scaling */
+static unsigned int bus_freq;
+static u32 bus_cycle; /* length of one bus cycle in pico-seconds */
+static u32 bus_scale; /* scaling factor convert ns to bus cycles */
+u64 cpu_freq;
+
+/*
+ * The timer chip is already set up at HZ interrupts per second here,
+ * but we do not accept timer interrupts yet. We only allow the BP
+ * to calibrate.
+ */
+static unsigned int __init get_8254_timer_count(void)
+{
+ /*extern spinlock_t i8253_lock;*/
+ /*unsigned long flags;*/
+ unsigned int count;
+ /*spin_lock_irqsave(&i8253_lock, flags);*/
+ outb_p(0x00, 0x43);
+ count = inb_p(0x40);
+ count |= inb_p(0x40) << 8;
+ /*spin_unlock_irqrestore(&i8253_lock, flags);*/
+ return count;
+}
+
+void __init wait_8254_wraparound(void)
+{
+ unsigned int curr_count, prev_count=~0;
+ int delta;
+ curr_count = get_8254_timer_count();
+ do {
+ prev_count = curr_count;
+ curr_count = get_8254_timer_count();
+ delta = curr_count-prev_count;
+ /*
+ * This limit for delta seems arbitrary, but it isn't, it's slightly
+ * above the level of error a buggy Mercury/Neptune chipset timer can
+ * cause.
+ */
+ } while (delta < 300);
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function with a very large value and read the current time after
+ * a well defined period of time as expired.
+ *
+ * Calibration is only performed once, for CPU0!
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+#define APIC_DIVISOR 1
+static void __setup_APIC_LVTT(unsigned int clocks)
+{
+ unsigned int lvtt1_value, tmp_value;
+ lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR;
+ apic_write_around(APIC_LVTT, lvtt1_value);
+ tmp_value = apic_read(APIC_TDCR);
+ apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));
+ apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+/*
+ * this is done for every CPU from setup_APIC_clocks() below.
+ * We setup each local APIC with a zero timeout value for now.
+ * Unlike Linux, we don't have to wait for slices etc.
+ */
+void setup_APIC_timer(void * data)
+{
+ unsigned long flags;
+ __save_flags(flags);
+ __sti();
+ printk("cpu: %d: setup timer.", smp_processor_id());
+ __setup_APIC_LVTT(0);
+ printk("done\n");
+ __restore_flags(flags);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external timer.
+ *
+ * As a result we have the Bys Speed and CPU speed in Hz.
+ *
+ * We want to do the calibration only once (for CPU0). CPUs connected by the
+ * same APIC bus have the very same bus frequency.
+ *
+ * This bit is a bit shoddy since we use the very same periodic timer interrupt
+ * we try to eliminate to calibrate the APIC.
+ */
+
+int __init calibrate_APIC_clock(void)
+{
+ unsigned long long t1 = 0, t2 = 0;
+ long tt1, tt2;
+ long result;
+ int i;
+ const int LOOPS = HZ/10;
+
+ printk("calibrating APIC timer for CPU%d...\n", smp_processor_id());
+
+ /* Put whatever arbitrary (but long enough) timeout
+ * value into the APIC clock, we just want to get the
+ * counter running for calibration. */
+ __setup_APIC_LVTT(1000000000);
+
+ /* The timer chip counts down to zero. Let's wait
+ * for a wraparound to start exact measurement:
+ * (the current tick might have been already half done) */
+ wait_8254_wraparound();
+
+ /* We wrapped around just now. Let's start: */
+ rdtscll(t1);
+ tt1 = apic_read(APIC_TMCCT);
+
+ /* Let's wait LOOPS wraprounds: */
+ for (i = 0; i < LOOPS; i++)
+ wait_8254_wraparound();
+
+ tt2 = apic_read(APIC_TMCCT);
+ rdtscll(t2);
+
+ /* The APIC bus clock counter is 32 bits only, it
+ * might have overflown, but note that we use signed
+ * longs, thus no extra care needed.
+ * underflown to be exact, as the timer counts down ;) */
+ result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+
+ printk("..... CPU speed is %ld.%04ld MHz.\n",
+ ((long)(t2-t1)/LOOPS)/(1000000/HZ),
+ ((long)(t2-t1)/LOOPS)%(1000000/HZ));
+
+ printk("..... Bus speed is %ld.%04ld MHz.\n",
+ result/(1000000/HZ),
+ result%(1000000/HZ));
+
+ cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ);
+
+ /* set up multipliers for accurate timer code */
+ bus_freq = result*HZ;
+ bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
+ bus_scale = (1000*262144)/bus_cycle;
+
+ /* print results */
+ printk("..... bus_freq = %u Hz\n", bus_freq);
+ printk("..... bus_cycle = %u ps\n", bus_cycle);
+ printk("..... bus_scale = %u \n", bus_scale);
+ /* reset APIC to zero timeout value */
+ __setup_APIC_LVTT(0);
+ return result;
+}
+
+/*
+ * initialise the APIC timers for all CPUs
+ * we start with the first and find out processor frequency and bus speed
+ */
+void __init setup_APIC_clocks (void)
+{
+ printk("Using local APIC timer interrupts.\n");
+ using_apic_timer = 1;
+ __cli();
+ /* calibrate CPU0 for CPU speed and BUS speed */
+ bus_freq = calibrate_APIC_clock();
+ /* Now set up the timer for real. */
+ setup_APIC_timer((void *)bus_freq);
+ __sti();
+ /* and update all other cpus */
+ smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
+}
+
+#undef APIC_DIVISOR
+
+/*
+ * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
+ * returns 1 on success
+ * returns 0 if the timeout value is too small or in the past.
+ */
+int reprogram_ac_timer(s_time_t timeout)
+{
+ int cpu = smp_processor_id();
+ s_time_t now;
+ s_time_t expire;
+ u64 apic_tmict;
+
+ now = NOW();
+ expire = timeout - now; /* value from now */
+
+ if (expire <= 0) {
+ printk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n",
+ cpu, (u32)(now>>32), (u32)now, (u32)(timeout>>32),(u32)timeout);
+ return 0; /* timeout value in the past */
+ }
+
+ /* conversion to bus units */
+ apic_tmict = (((u64)bus_scale) * expire)>>18;
+
+ if (apic_tmict >= 0xffffffff) {
+ printk("APICT[%02d] Timeout value too large\n", cpu);
+ apic_tmict = 0xffffffff;
+ }
+ if (apic_tmict == 0) {
+ printk("APICT[%02d] timeout value too small\n", cpu);
+ return 0;
+ }
+
+ /* programm timer */
+ apic_write(APIC_TMICT, (unsigned long)apic_tmict);
+
+ TRC(printk("APICT[%02d] reprog(): expire=%lld %u\n",
+ cpu, expire, apic_tmict));
+ return 1;
+}
+
+/*
+ * Local timer interrupt handler.
+ * here the programmable, accurate timers are executed.
+ * If we are on CPU0 and we should have updated jiffies, we do this
+ * as well and and deal with traditional linux timers. Note, that of
+ * the timer APIC on CPU does not go off every 10ms or so the linux
+ * timers loose accuracy, but that shouldn't be a problem.
+ */
+static s_time_t last_cpu0_tirq = 0;
+inline void smp_local_timer_interrupt(struct pt_regs * regs)
+{
+ int cpu = smp_processor_id();
+ s_time_t diff, now;
+
+ /* if CPU 0 do old timer stuff */
+ if (cpu == 0)
+ {
+ now = NOW();
+ diff = now - last_cpu0_tirq;
+
+ if (diff <= 0) {
+ printk ("System Time went backwards: %lld\n", diff);
+ return;
+ }
+
+ while (diff >= MILLISECS(10)) {
+ do_timer(regs);
+ diff -= MILLISECS(10);
+ last_cpu0_tirq += MILLISECS(10);
+ }
+ }
+ /* call accurate timer function */
+ do_ac_timer();
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesnt support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ * interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+unsigned int apic_timer_irqs [NR_CPUS];
+
+void smp_apic_timer_interrupt(struct pt_regs * regs)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+ apic_timer_irqs[cpu]++;
+
+ /*
+ * NOTE! We'd better ACK the irq immediately, because timer handling can
+ * be slow. XXX is this save?
+ */
+ ack_APIC_irq();
+
+ /* call the local handler */
+ irq_enter(cpu, 0);
+ smp_local_timer_interrupt(regs);
+ irq_exit(cpu, 0);
+
+ if (softirq_pending(cpu))
+ do_softirq();
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+ unsigned long v;
+
+ /*
+ * Check if this really is a spurious interrupt and ACK it
+ * if it is a vectored one. Just in case...
+ * Spurious interrupts should not be ACKed.
+ */
+ v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+ if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+ ack_APIC_irq();
+
+ /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+ printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+ smp_processor_id());
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+asmlinkage void smp_error_interrupt(void)
+{
+ unsigned long v, v1;
+
+ /* First tickle the hardware, only then report what went on. -- REW */
+ v = apic_read(APIC_ESR);
+ apic_write(APIC_ESR, 0);
+ v1 = apic_read(APIC_ESR);
+ ack_APIC_irq();
+ atomic_inc(&irq_err_count);
+
+ /* Here is what the APIC error bits mean:
+ 0: Send CS error
+ 1: Receive CS error
+ 2: Send accept error
+ 3: Receive accept error
+ 4: Reserved
+ 5: Send illegal vector
+ 6: Received illegal vector
+ 7: Illegal register address
+ */
+ printk ("APIC error on CPU%d: %02lx(%02lx)\n",
+ smp_processor_id(), v , v1);
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+ if (!smp_found_config && !cpu_has_apic)
+ return -1;
+
+ /*
+ * Complain if the BIOS pretends there is one.
+ */
+ if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]))
+ {
+ printk("BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ return -1;
+ }
+
+ verify_local_APIC();
+
+ connect_bsp_APIC();
+
+ phys_cpu_present_map = 1;
+ apic_write_around(APIC_ID, boot_cpu_physical_apicid);
+
+ apic_pm_init2();
+
+ setup_local_APIC();
+
+#ifdef CONFIG_X86_IO_APIC
+ if (smp_found_config && nr_ioapics)
+ setup_IO_APIC();
+#endif
+ setup_APIC_clocks();
+
+ return 0;
+}
diff --git a/xen/arch/i386/boot/boot.S b/xen/arch/i386/boot/boot.S
new file mode 100644
index 0000000000..091b760576
--- /dev/null
+++ b/xen/arch/i386/boot/boot.S
@@ -0,0 +1,239 @@
+#include <xeno/config.h>
+#include <asm/page.h>
+
+#define SECONDARY_CPU_FLAG 0xA5A5A5A5
+
+ .text
+
+ENTRY(start)
+ jmp hal_entry
+
+ .align 4
+
+/*** MULTIBOOT HEADER ****/
+ /* Magic number indicating a Multiboot header. */
+ .long 0x1BADB002
+ /* Flags to bootloader (see Multiboot spec). */
+ .long 0x00000006
+ /* Checksum: must be the negated sum of the first two fields. */
+ .long -0x1BADB008
+ /* Unused loader addresses (ELF header has all this already).*/
+ .long 0,0,0,0,0
+ /* EGA text mode. */
+ .long 1,0,0,0
+
+hal_entry:
+ /* Set up a few descriptors: on entry only CS is guaranteed good. */
+ lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET
+ mov $(__HYPERVISOR_DS),%ecx
+ mov %ecx,%ds
+ mov %ecx,%es
+ ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
+1: lss stack_start-__PAGE_OFFSET,%esp
+
+ /* Reset EFLAGS (subsumes CLI and CLD). */
+ pushl $0
+ popf
+
+ /* CPU type checks. We need P6+. */
+ mov $0x200000,%edx
+ pushfl
+ pop %ecx
+ and %edx,%ecx
+ jne bad_cpu # ID bit should be clear
+ pushl %edx
+ popfl
+ pushfl
+ pop %ecx
+ and %edx,%ecx
+ je bad_cpu # ID bit should be set
+
+ /* Set up CR0. */
+ mov %cr0,%ecx
+ and $0x00000011,%ecx # save ET and PE
+ or $0x00050022,%ecx # set AM, WP, NE and MP
+ mov %ecx,%cr0
+
+ /* Set up FPU. */
+ fninit
+
+ /* Set up CR4, except global flag which Intel requires should be */
+ /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
+ mov %cr4,%ecx
+ or mmu_cr4_features-__PAGE_OFFSET,%ecx
+ mov %ecx,mmu_cr4_features-__PAGE_OFFSET
+ and $0x7f,%ecx /* disable GLOBAL bit */
+ mov %ecx,%cr4
+
+ /* Is this a non-boot processor? */
+ cmp $(SECONDARY_CPU_FLAG),%ebx
+ jne continue_boot_cpu
+
+ call start_paging
+ lidt idt_descr
+ jmp initialize_secondary
+
+continue_boot_cpu:
+ add $__PAGE_OFFSET,%ebx
+ push %ebx /* Multiboot info struct */
+ push %eax /* Multiboot magic value */
+
+ /* Initialize BSS (no nasty surprises!) */
+ mov $__bss_start-__PAGE_OFFSET,%edi
+ mov $_end-__PAGE_OFFSET,%ecx
+ sub %edi,%ecx
+ xor %eax,%eax
+ rep stosb
+
+ /* Initialize low and high mappings of all memory with 4MB pages */
+ mov $idle0_pg_table-__PAGE_OFFSET,%edi
+ mov $0x1e3,%eax /* PRESENT+RW+A+D+4MB+GLOBAL */
+1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
+ stosl /* low mapping */
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $MAX_DIRECTMAP_ADDRESS+0x1e3,%eax
+ jne 1b
+
+ call start_paging
+ call setup_idt
+ lidt idt_descr
+
+ /* Call into main C routine. This should never return.*/
+ call cmain
+ ud2 /* Force a panic (invalid opcode). */
+
+start_paging:
+ mov $idle0_pg_table-__PAGE_OFFSET,%eax
+ mov %eax,%cr3
+ mov %cr0,%eax
+ or $0x80010000,%eax /* set PG and WP bits */
+ mov %eax,%cr0
+ jmp 1f
+1: /* Install relocated selectors (FS/GS unused). */
+ lgdt gdt_descr
+ mov $(__HYPERVISOR_DS),%ecx
+ mov %ecx,%ds
+ mov %ecx,%es
+ mov %ecx,%ss
+ ljmp $(__HYPERVISOR_CS),$1f
+1: /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
+ movl mmu_cr4_features,%ecx
+ movl %ecx,%cr4
+ /* Relocate ESP */
+ add $__PAGE_OFFSET,%esp
+ /* Relocate EIP via return jump */
+ pop %ecx
+ add $__PAGE_OFFSET,%ecx
+ jmp *%ecx
+
+
+/*** INTERRUPT INITIALISATION ***/
+
+setup_idt:
+ lea ignore_int,%edx
+ mov $(__HYPERVISOR_CS << 16),%eax
+ mov %dx,%ax /* selector = 0x0010 = cs */
+ mov $0x8E00,%dx /* interrupt gate - dpl=0, present */
+
+ lea SYMBOL_NAME(idt_table),%edi
+ mov $256,%ecx
+1: mov %eax,(%edi)
+ mov %edx,4(%edi)
+ add $8,%edi
+ loop 1b
+ ret
+
+/* This is the default interrupt handler. */
+int_msg:
+ .asciz "Unknown interrupt\n"
+ ALIGN
+ignore_int:
+ cld
+ push %eax
+ push %ecx
+ push %edx
+ pushl %es
+ pushl %ds
+ mov $(__HYPERVISOR_DS),%eax
+ mov %eax,%ds
+ mov %eax,%es
+ pushl $int_msg
+ call SYMBOL_NAME(printf)
+1: jmp 1b
+ pop %eax
+ popl %ds
+ popl %es
+ pop %edx
+ pop %ecx
+ pop %eax
+ iret
+
+
+bad_cpu_msg:
+ .asciz "Bad CPU type. Need P6+."
+ ALIGN
+bad_cpu:
+ call init_serial
+ mov $bad_cpu_msg,%esi
+1: lodsb
+ test %al,%al
+ je 1f
+ push %eax
+ call putchar_serial
+ add $4,%esp
+ jmp 1b
+1: jmp 1b
+
+
+/*** STACK LOCATION ***/
+
+ENTRY(stack_start)
+ .long SYMBOL_NAME(idle0_task_union)+8192-__PAGE_OFFSET
+ .long __HYPERVISOR_DS
+
+/*** DESCRIPTOR TABLES ***/
+
+.globl SYMBOL_NAME(idt)
+.globl SYMBOL_NAME(gdt)
+
+ ALIGN
+
+ .word 0
+idt_descr:
+ .word 256*8-1
+SYMBOL_NAME(idt):
+ .long SYMBOL_NAME(idt_table)
+
+ .word 0
+gdt_descr:
+ .word 256*8-1
+SYMBOL_NAME(gdt):
+ .long SYMBOL_NAME(gdt_table) /* gdt base */
+
+ .word 0
+nopaging_gdt_descr:
+ .word 256*8-1
+ .long SYMBOL_NAME(gdt_table)-__PAGE_OFFSET
+
+ ALIGN
+/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */
+/* machine->physical mapping table. Ring 0 can access all memory. */
+ENTRY(gdt_table)
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x0000000000000000 /* not used */
+ .quad 0x00cfba000000c3ff /* 0x11 ring 1 3.95GB code at 0x0 */
+ .quad 0x00cfb2000000c3ff /* 0x19 ring 1 3.95GB data at 0x0 */
+ .quad 0x00cffa000000c3ff /* 0x23 ring 3 3.95GB code at 0x0 */
+ .quad 0x00cff2000000c3ff /* 0x2b ring 3 3.95GB data at 0x0 */
+ .quad 0x00cf9a000000ffff /* 0x30 ring 0 4.00GB code at 0x0 */
+ .quad 0x00cf92000000ffff /* 0x38 ring 0 4.00GB data at 0x0 */
+ .fill NR_CPUS,8,0 /* space for TSS's */
+
+# The following adds 12kB to the kernel file size.
+ .org 0x1000
+ENTRY(idle0_pg_table)
+ .org 0x2000
+ENTRY(idle0_task_union)
+ .org 0x4000
+ENTRY(stext)
+ENTRY(_stext)
diff --git a/xen/arch/i386/delay.c b/xen/arch/i386/delay.c
new file mode 100644
index 0000000000..078ff77344
--- /dev/null
+++ b/xen/arch/i386/delay.c
@@ -0,0 +1,29 @@
+/*
+ * Precise Delay Loops for i386
+ *
+ * Copyright (C) 1993 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * The __delay function must _NOT_ be inlined as its execution time
+ * depends wildly on alignment on many x86 processors. The additional
+ * jump magic is needed to get the timing stable on all the CPU's
+ * we have to worry about.
+ */
+
+#include <xeno/config.h>
+#include <xeno/delay.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+void __udelay(unsigned long usecs)
+{
+ unsigned long ticks = usecs * ticks_per_usec;
+ unsigned long s, e;
+
+ rdtscl(s);
+ do
+ {
+ rep_nop();
+ rdtscl(e);
+ } while ((e-s) < ticks);
+}
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S
new file mode 100644
index 0000000000..928a96ed4e
--- /dev/null
+++ b/xen/arch/i386/entry.S
@@ -0,0 +1,534 @@
+/*
+ * linux/arch/i386/entry.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * Stack layout in 'ret_from_system_call':
+ * 0(%esp) - %ebx
+ * 4(%esp) - %ecx
+ * 8(%esp) - %edx
+ * C(%esp) - %esi
+ * 10(%esp) - %edi
+ * 14(%esp) - %ebp
+ * 18(%esp) - %eax
+ * 1C(%esp) - %ds
+ * 20(%esp) - %es
+ * 24(%esp) - orig_eax
+ * 28(%esp) - %eip
+ * 2C(%esp) - %cs
+ * 30(%esp) - %eflags
+ * 34(%esp) - %oldesp
+ * 38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+/* The idea for callbacks from monitor -> guest OS.
+ *
+ * First, we require that all callbacks (either via a supplied
+ * interrupt-descriptor-table, or via the special event or failsafe callbacks
+ * in the shared-info-structure) are to ring 1. This just makes life easier,
+ * in that it means we don't have to do messy GDT/LDT lookups to find
+ * out which the privilege-level of the return code-selector. That code
+ * would just be a hassle to write, and would need to account for running
+ * off the end of the GDT/LDT, for example. The event callback has quite
+ * a constrained callback method: the guest OS provides a linear address
+ * which we call back to using the hard-coded __GUEST_CS descriptor (which
+ * is a ring 1 descriptor). For IDT callbacks, we check that the provided
+ * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
+ * don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
+ * It's up to the guest OS to ensure all returns via the IDT are to ring 1.
+ * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather
+ * than the correct ring) and bad things are bound to ensue -- IRET is
+ * likely to fault, and we may end up killing the domain (no harm can
+ * come to the hypervisor itself, though).
+ *
+ * When doing a callback, we check if the return CS is in ring 0. If so,
+ * callback is delayed until next return to ring != 0.
+ * If return CS is in ring 1, then we create a callback frame
+ * starting at return SS/ESP. The base of the frame does an intra-privilege
+ * interrupt-return.
+ * If return CS is in ring > 1, we create a callback frame starting
+ * at SS/ESP taken from appropriate section of the current TSS. The base
+ * of the frame does an inter-privilege interrupt-return.
+ *
+ * Note that the "failsafe callback" uses a special stackframe:
+ * { return_DS, return_ES, return_EIP, return_CS, return_EFLAGS, ... }
+ * That is, original values for DS/ES are placed on stack rather than
+ * in DS/ES themselves. Why? It saves us loading them, only to have them
+ * saved/restored in guest OS. Furthermore, if we load them we may cause
+ * a fault if they are invalid, which is a hassle to deal with. We avoid
+ * that problem if we don't load them :-) This property allows us to use
+ * the failsafe callback as a fallback: if we ever fault on loading DS/ES
+ * on return to ring != 0, we can simply package it up as a return via
+ * the failsafe callback, and let the guest OS sort it out (perhaps by
+ * killing an application process). Note that we also do this for any
+ * faulting IRET -- just let the guest OS handle it via the event
+ * callback.
+ *
+ * We terminate a domain in the following cases:
+ * - creating a callback stack frame (due to bad ring-1 stack).
+ * - faulting IRET on entry to failsafe callback handler.
+ * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
+ * handler in good order (absolutely no faults allowed!).
+ */
+
+#include <xeno/config.h>
+#include <asm/smp.h>
+
+EBX = 0x00
+ECX = 0x04
+EDX = 0x08
+ESI = 0x0C
+EDI = 0x10
+EBP = 0x14
+EAX = 0x18
+DS = 0x1C
+ES = 0x20
+ORIG_EAX = 0x24
+EIP = 0x28
+CS = 0x2C
+EFLAGS = 0x30
+OLDESP = 0x34
+OLDSS = 0x38
+
+/* Offsets in task_struct */
+PROCESSOR = 0
+STATE = 4
+HYP_EVENTS = 8
+DOMAIN = 12
+SHARED_INFO = 16
+
+/* Offsets in shared_info_t */
+EVENTS = 0
+EVENTS_ENABLE = 4
+EVENT_ADDR = 8
+FAILSAFE_ADDR = 12
+
+/* Offsets in guest_trap_bounce */
+GTB_ERROR_CODE = 0
+GTB_CR2 = 4
+GTB_FLAGS = 8
+GTB_CS = 10
+GTB_EIP = 12
+GTBF_TRAP = 1
+GTBF_TRAP_NOCODE = 2
+GTBF_TRAP_CR2 = 4
+
+CF_MASK = 0x00000001
+IF_MASK = 0x00000200
+NT_MASK = 0x00004000
+
+#define SAVE_ALL \
+ cld; \
+ pushl %es; \
+ pushl %ds; \
+ pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
+ pushl %ebx; \
+ movl $(__HYPERVISOR_DS),%edx; \
+ movl %edx,%ds; \
+ movl %edx,%es;
+
+#define RESTORE_ALL \
+ popl %ebx; \
+ popl %ecx; \
+ popl %edx; \
+ popl %esi; \
+ popl %edi; \
+ popl %ebp; \
+ popl %eax; \
+1: popl %ds; \
+2: popl %es; \
+ addl $4,%esp; \
+3: iret; \
+.section .fixup,"ax"; \
+6: subl $4,%esp; \
+ pushl %es; \
+5: pushl %ds; \
+4: pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
+ pushl %ebx; \
+ pushl %ss; \
+ popl %ds; \
+ pushl %ss; \
+ popl %es; \
+ jmp failsafe_callback; \
+.previous; \
+.section __ex_table,"a"; \
+ .align 4; \
+ .long 1b,4b; \
+ .long 2b,5b; \
+ .long 3b,6b; \
+.previous
+
+#define GET_CURRENT(reg) \
+ movl $-8192, reg; \
+ andl %esp, reg
+
+ENTRY(ret_from_newdomain)
+ GET_CURRENT(%ebx)
+ jmp test_all_events
+
+ ALIGN
+restore_all:
+ RESTORE_ALL
+
+ ALIGN
+ENTRY(hypervisor_call)
+ pushl %eax # save orig_eax
+ SAVE_ALL
+ GET_CURRENT(%ebx)
+ andl $255,%eax
+ call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4)
+ movl %eax,EAX(%esp) # save the return value
+
+test_all_events:
+ mov PROCESSOR(%ebx),%eax
+ shl $4,%eax # sizeof(irq_cpustat) == 16
+ lea guest_trap_bounce(%eax),%edx
+ cli # tests must not race interrupts
+ xorl %ecx,%ecx
+ notl %ecx
+test_softirqs:
+ mov PROCESSOR(%ebx),%eax
+ shl $4,%eax # sizeof(irq_cpustat) == 16
+ test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+ jnz process_softirqs
+test_hyp_events:
+ test %ecx, HYP_EVENTS(%ebx)
+ jnz process_hyp_events
+test_guest_events:
+ movl SHARED_INFO(%ebx),%eax
+ test %ecx,EVENTS(%eax)
+ jz restore_all
+ test %ecx,EVENTS_ENABLE(%eax)
+ jz restore_all
+ /* Prevent unnecessary reentry of event callback (stack overflow!) */
+ xorl %ecx,%ecx
+ movl %ecx,EVENTS_ENABLE(%eax)
+/* %eax == shared_info, %ebx == task_struct, %edx == guest_trap_bounce */
+process_guest_events:
+ movl EVENT_ADDR(%eax),%eax
+ movl %eax,GTB_EIP(%edx)
+ movw $__GUEST_CS,GTB_CS(%edx)
+ call create_bounce_frame
+ jmp restore_all
+
+ ALIGN
+process_softirqs:
+ push %edx
+ call SYMBOL_NAME(do_softirq)
+ pop %edx
+ jmp test_hyp_events
+
+ ALIGN
+process_hyp_events:
+ sti
+ call SYMBOL_NAME(do_hyp_events)
+ jmp test_all_events
+
+/* No special register assumptions */
+failsafe_callback:
+ GET_CURRENT(%ebx)
+ mov PROCESSOR(%ebx),%eax
+ shl $4,%eax
+ lea guest_trap_bounce(%eax),%edx
+ movl SHARED_INFO(%ebx),%eax
+ movl FAILSAFE_ADDR(%eax),%eax
+ movl %eax,GTB_EIP(%edx)
+ movw $__GUEST_CS,GTB_CS(%edx)
+ call create_bounce_frame
+ subl $8,%esi # add DS/ES to failsafe stack frame
+ movl DS(%esp),%eax
+FAULT1: movl %eax,(%esi)
+ movl ES(%esp),%eax
+FAULT2: movl %eax,4(%esi)
+ movl %esi,OLDESP(%esp)
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ addl $12,%esp
+FAULT3: iret
+
+
+/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */
+/* {EIP, CS, EFLAGS, [ESP, SS]} */
+/* %edx == guest_trap_bounce, %ebx == task_struct */
+/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP. */
+create_bounce_frame:
+ mov CS+4(%esp),%cl
+ test $2,%cl
+ jz 1f /* jump if returning to an existing ring-1 activation */
+ /* obtain ss/esp from TSS -- no current ring-1 activations */
+ movl PROCESSOR(%ebx),%eax
+ shll $8,%eax /* multiply by 256 */
+ addl $init_tss + 12,%eax
+ movl (%eax),%esi /* tss->esp1 */
+FAULT4: movl 4(%eax),%ds /* tss->ss1 */
+ /* base of stack frame must contain ss/esp (inter-priv iret) */
+ subl $8,%esi
+ movl OLDESP+4(%esp),%eax
+FAULT5: movl %eax,(%esi)
+ movl OLDSS+4(%esp),%eax
+FAULT6: movl %eax,4(%esi)
+ jmp 2f
+1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
+ movl OLDESP+4(%esp),%esi
+FAULT7: movl OLDSS+4(%esp),%ds
+2: /* Construct a stack frame: EFLAGS, CS/EIP */
+ subl $12,%esi
+ movl EIP+4(%esp),%eax
+FAULT8: movl %eax,(%esi)
+ movl CS+4(%esp),%eax
+FAULT9: movl %eax,4(%esi)
+ movl EFLAGS+4(%esp),%eax
+FAULT10:movl %eax,8(%esi)
+ /* Rewrite our stack frame and return to ring 1. */
+ /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
+ andl $0xfffcbeff,%eax
+ movl %eax,EFLAGS+4(%esp)
+ movl %ds,OLDSS+4(%esp)
+ movl %esi,OLDESP+4(%esp)
+ movzwl %es:GTB_CS(%edx),%eax
+ movl %eax,CS+4(%esp)
+ movl %es:GTB_EIP(%edx),%eax
+ movl %eax,EIP+4(%esp)
+ ret
+
+
+.section __ex_table,"a"
+ .align 4
+ .long FAULT1, kill_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT2, kill_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT3, kill_domain_fixup1 # Fault executing failsafe iret
+ .long FAULT4, kill_domain_fixup2 # Fault loading ring-1 stack selector
+ .long FAULT5, kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT6, kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT7, kill_domain_fixup2 # Fault loading ring-1 stack selector
+ .long FAULT8, kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT9, kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT10,kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT11,kill_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT12,kill_domain_fixup3 # Fault writing to ring-1 stack
+.previous
+
+# This handler kills domains which experience unrecoverable faults.
+.section .fixup,"ax"
+kill_domain_fixup1:
+ subl $4,%esp
+ SAVE_ALL
+ jmp kill_domain
+kill_domain_fixup2:
+ addl $4,%esp
+kill_domain_fixup3:
+ pushl %ss
+ popl %ds
+ jmp kill_domain
+.previous
+
+ ALIGN
+process_guest_exception_and_events:
+ mov PROCESSOR(%ebx),%eax
+ shl $4,%eax # sizeof(irq_cpustat) == 16
+ lea guest_trap_bounce(%eax),%edx
+ testb $~0,GTB_FLAGS(%edx)
+ jz test_all_events
+ call create_bounce_frame # just the basic frame
+ mov %es:GTB_FLAGS(%edx),%cl
+ test $GTBF_TRAP_NOCODE,%cl
+ jnz 2f
+ subl $4,%esi # push error_code onto guest frame
+ movl %es:GTB_ERROR_CODE(%edx),%eax
+FAULT11:movl %eax,(%esi)
+ test $GTBF_TRAP_CR2,%cl
+ jz 1f
+ subl $4,%esi # push %cr2 onto guest frame
+ movl %es:GTB_CR2(%edx),%eax
+FAULT12:movl %eax,(%esi)
+1: movl %esi,OLDESP(%esp)
+2: push %es # unclobber %ds
+ pop %ds
+ movb $0,GTB_FLAGS(%edx)
+ jmp test_all_events
+
+ ALIGN
+ENTRY(ret_from_intr)
+ GET_CURRENT(%ebx)
+ movb CS(%esp),%al
+ testb $3,%al # return to non-supervisor?
+ jne test_all_events
+ jmp restore_all
+
+ ALIGN
+ret_from_exception:
+ movb CS(%esp),%al
+ testb $3,%al # return to non-supervisor?
+ jne process_guest_exception_and_events
+ jmp restore_all
+
+ ALIGN
+
+ENTRY(divide_error)
+ pushl $0 # no error code
+ pushl $ SYMBOL_NAME(do_divide_error)
+ ALIGN
+error_code:
+ pushl %ds
+ pushl %eax
+ xorl %eax,%eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ decl %eax # eax = -1
+ pushl %ecx
+ pushl %ebx
+ cld
+ movl %es,%ecx
+ movl ORIG_EAX(%esp), %esi # get the error code
+ movl ES(%esp), %edi # get the function address
+ movl %eax, ORIG_EAX(%esp)
+ movl %ecx, ES(%esp)
+ movl %esp,%edx
+ pushl %esi # push the error code
+ pushl %edx # push the pt_regs pointer
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
+ GET_CURRENT(%ebx)
+ call *%edi
+ addl $8,%esp
+ jmp ret_from_exception
+
+ENTRY(coprocessor_error)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_coprocessor_error)
+ jmp error_code
+
+ENTRY(simd_coprocessor_error)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+ jmp error_code
+
+ENTRY(device_not_available)
+ pushl $0
+ pushl $SYMBOL_NAME(math_state_restore)
+ jmp error_code
+
+ENTRY(debug)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_debug)
+ jmp error_code
+
+ENTRY(nmi)
+ pushl %eax
+ SAVE_ALL
+ movl %esp,%edx
+ pushl $0
+ pushl %edx
+ call SYMBOL_NAME(do_nmi)
+ addl $8,%esp
+ RESTORE_ALL
+
+ENTRY(int3)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_int3)
+ jmp error_code
+
+ENTRY(overflow)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_overflow)
+ jmp error_code
+
+ENTRY(bounds)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_bounds)
+ jmp error_code
+
+ENTRY(invalid_op)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_invalid_op)
+ jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+ jmp error_code
+
+ENTRY(double_fault)
+ pushl $ SYMBOL_NAME(do_double_fault)
+ jmp error_code
+
+ENTRY(invalid_TSS)
+ pushl $ SYMBOL_NAME(do_invalid_TSS)
+ jmp error_code
+
+ENTRY(segment_not_present)
+ pushl $ SYMBOL_NAME(do_segment_not_present)
+ jmp error_code
+
+ENTRY(stack_segment)
+ pushl $ SYMBOL_NAME(do_stack_segment)
+ jmp error_code
+
+ENTRY(general_protection)
+ pushl $ SYMBOL_NAME(do_general_protection)
+ jmp error_code
+
+ENTRY(alignment_check)
+ pushl $ SYMBOL_NAME(do_alignment_check)
+ jmp error_code
+
+ENTRY(page_fault)
+ pushl $ SYMBOL_NAME(do_page_fault)
+ jmp error_code
+
+ENTRY(machine_check)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_machine_check)
+ jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+ jmp error_code
+
+.data
+ENTRY(hypervisor_call_table)
+ .long SYMBOL_NAME(do_set_trap_table)
+ .long SYMBOL_NAME(do_process_page_updates)
+ .long SYMBOL_NAME(do_console_write)
+ .long SYMBOL_NAME(do_set_gdt)
+ .long SYMBOL_NAME(do_stack_and_ldt_switch)
+ .long SYMBOL_NAME(do_net_update)
+ .long SYMBOL_NAME(do_fpu_taskswitch)
+ .long SYMBOL_NAME(do_sched_op)
+ .long SYMBOL_NAME(kill_domain)
+ .long SYMBOL_NAME(do_dom0_op)
+ .long SYMBOL_NAME(do_network_op)
+ .long SYMBOL_NAME(do_block_io_op)
+ .long SYMBOL_NAME(do_set_debugreg)
+ .long SYMBOL_NAME(do_get_debugreg)
+ .long SYMBOL_NAME(do_update_descriptor)
+ .long SYMBOL_NAME(do_set_fast_trap)
+ .rept NR_syscalls-(.-hypervisor_call_table)/4
+ .long SYMBOL_NAME(sys_ni_syscall)
+ .endr
diff --git a/xen/arch/i386/extable.c b/xen/arch/i386/extable.c
new file mode 100644
index 0000000000..4cd9f064c3
--- /dev/null
+++ b/xen/arch/i386/extable.c
@@ -0,0 +1,62 @@
+/*
+ * linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long
+search_one_table(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value)
+{
+ while (first <= last) {
+ const struct exception_table_entry *mid;
+ long diff;
+
+ mid = (last - first) / 2 + first;
+ diff = mid->insn - value;
+ if (diff == 0)
+ return mid->fixup;
+ else if (diff < 0)
+ first = mid+1;
+ else
+ last = mid-1;
+ }
+ return 0;
+}
+
+extern spinlock_t modlist_lock;
+
+unsigned long
+search_exception_table(unsigned long addr)
+{
+ unsigned long ret = 0;
+
+#ifndef CONFIG_MODULES
+ /* There is only the kernel to search. */
+ ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
+ return ret;
+#else
+ unsigned long flags;
+ /* The kernel is the last "module" -- no need to treat it special. */
+ struct module *mp;
+
+ spin_lock_irqsave(&modlist_lock, flags);
+ for (mp = module_list; mp != NULL; mp = mp->next) {
+ if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
+ continue;
+ ret = search_one_table(mp->ex_table_start,
+ mp->ex_table_end - 1, addr);
+ if (ret)
+ break;
+ }
+ spin_unlock_irqrestore(&modlist_lock, flags);
+ return ret;
+#endif
+}
diff --git a/xen/arch/i386/i387.c b/xen/arch/i386/i387.c
new file mode 100644
index 0000000000..fe34ff16f5
--- /dev/null
+++ b/xen/arch/i386/i387.c
@@ -0,0 +1,56 @@
+/*
+ * linux/arch/i386/kernel/i387.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+
+void init_fpu(void)
+{
+ __asm__("fninit");
+ if ( cpu_has_xmm ) load_mxcsr(0x1f80);
+ current->flags |= PF_DONEFPUINIT;
+}
+
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+ if ( cpu_has_fxsr ) {
+ asm volatile( "fxsave %0 ; fnclex"
+ : "=m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "fnsave %0 ; fwait"
+ : "=m" (tsk->thread.i387.fsave) );
+ }
+ tsk->flags &= ~PF_USEDFPU;
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+ /*
+ * The guest OS may have set the 'virtual STTS' flag.
+ * This causes us to set the real flag, so we'll need
+ * to temporarily clear it while saving f-p state.
+ */
+ if ( tsk->flags & PF_GUEST_STTS ) clts();
+ __save_init_fpu(tsk);
+ stts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+ if ( cpu_has_fxsr ) {
+ asm volatile( "fxrstor %0"
+ : : "m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "frstor %0"
+ : : "m" (tsk->thread.i387.fsave) );
+ }
+}
diff --git a/xen/arch/i386/i8259.c b/xen/arch/i386/i8259.c
new file mode 100644
index 0000000000..645b7b0fef
--- /dev/null
+++ b/xen/arch/i386/i8259.c
@@ -0,0 +1,481 @@
+/******************************************************************************
+ * i8259.c
+ *
+ * Well, this is required for SMP systems as well, as it build interrupt
+ * tables for IO APICS as well as uniprocessor 8259-alikes.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <asm/ptrace.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/desc.h>
+#include <asm/bitops.h>
+#include <xeno/delay.h>
+#include <asm/apic.h>
+
+
+/*
+ * Common place to define all x86 IRQ vectors
+ *
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that save
+ * register context and call do_IRQ(). do_IRQ() then does all the
+ * operations that are needed to keep the AT (or SMP IOAPIC)
+ * interrupt-controller happy.
+ */
+
+BUILD_COMMON_IRQ()
+
+#define BI(x,y) \
+ BUILD_IRQ(x##y)
+
+#define BUILD_16_IRQS(x) \
+ BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+ BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+ BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+ BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
+ BUILD_16_IRQS(0x0)
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+ BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+ BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+ BUILD_16_IRQS(0xc)
+#endif
+
+#undef BUILD_16_IRQS
+#undef BI
+
+
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs)
+ */
+#ifdef CONFIG_SMP
+ BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR)
+ BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
+ BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+#endif
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+ BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+ BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
+ BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+#endif
+
+#define IRQ(x,y) \
+ IRQ##x##y##_interrupt
+
+#define IRQLIST_16(x) \
+ IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+ IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+ IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
+ void (*interrupt[NR_IRQS])(void) = {
+ IRQLIST_16(0x0),
+
+#ifdef CONFIG_X86_IO_APIC
+ IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+ IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+ IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+ IRQLIST_16(0xc)
+#endif
+ };
+
+#undef IRQ
+#undef IRQLIST_16
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ * this file should become arch/i386/kernel/irq.c when the old irq.c
+ * moves to arch independent land
+ */
+
+spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+
+static void end_8259A_irq (unsigned int irq)
+{
+ if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ enable_8259A_irq(irq);
+}
+
+#define shutdown_8259A_irq disable_8259A_irq
+
+void mask_and_ack_8259A(unsigned int);
+
+static unsigned int startup_8259A_irq(unsigned int irq)
+{
+ enable_8259A_irq(irq);
+ return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+ "XT-PIC",
+ startup_8259A_irq,
+ shutdown_8259A_irq,
+ enable_8259A_irq,
+ disable_8259A_irq,
+ mask_and_ack_8259A,
+ end_8259A_irq,
+ NULL
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y) (((unsigned char *)&(y))[x])
+#define cached_21 (__byte(0,cached_irq_mask))
+#define cached_A1 (__byte(1,cached_irq_mask))
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+ unsigned int mask = 1 << irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ cached_irq_mask |= mask;
+ if (irq & 8)
+ outb(cached_A1,0xA1);
+ else
+ outb(cached_21,0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+ unsigned int mask = ~(1 << irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ cached_irq_mask &= mask;
+ if (irq & 8)
+ outb(cached_A1,0xA1);
+ else
+ outb(cached_21,0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+ unsigned int mask = 1<<irq;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ if (irq < 8)
+ ret = inb(0x20) & mask;
+ else
+ ret = inb(0xA0) & (mask >> 8);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+ disable_irq_nosync(irq);
+ io_apic_irqs &= ~(1<<irq);
+ irq_desc[irq].handler = &i8259A_irq_type;
+ enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+ int value;
+ int irqmask = 1<<irq;
+
+ if (irq < 8) {
+ outb(0x0B,0x20); /* ISR register */
+ value = inb(0x20) & irqmask;
+ outb(0x0A,0x20); /* back to the IRR register */
+ return value;
+ }
+ outb(0x0B,0xA0); /* ISR register */
+ value = inb(0xA0) & (irqmask >> 8);
+ outb(0x0A,0xA0); /* back to the IRR register */
+ return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+void mask_and_ack_8259A(unsigned int irq)
+{
+ unsigned int irqmask = 1 << irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ /*
+ * Lightweight spurious IRQ detection. We do not want
+ * to overdo spurious IRQ handling - it's usually a sign
+ * of hardware problems, so we only do the checks we can
+ * do without slowing down good hardware unnecesserily.
+ *
+ * Note that IRQ7 and IRQ15 (the two spurious IRQs
+ * usually resulting from the 8259A-1|2 PICs) occur
+ * even if the IRQ is masked in the 8259A. Thus we
+ * can check spurious 8259A IRQs without doing the
+ * quite slow i8259A_irq_real() call for every IRQ.
+ * This does not cover 100% of spurious interrupts,
+ * but should be enough to warn the user that there
+ * is something bad going on ...
+ */
+ if (cached_irq_mask & irqmask)
+ goto spurious_8259A_irq;
+ cached_irq_mask |= irqmask;
+
+ handle_real_irq:
+ if (irq & 8) {
+ inb(0xA1); /* DUMMY - (do we need this?) */
+ outb(cached_A1,0xA1);
+ outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
+ outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */
+ } else {
+ inb(0x21); /* DUMMY - (do we need this?) */
+ outb(cached_21,0x21);
+ outb(0x60+irq,0x20); /* 'Specific EOI' to master */
+ }
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+ return;
+
+ spurious_8259A_irq:
+ /*
+ * this is the slow path - should happen rarely.
+ */
+ if (i8259A_irq_real(irq))
+ /*
+ * oops, the IRQ _is_ in service according to the
+ * 8259A - not spurious, go handle it.
+ */
+ goto handle_real_irq;
+
+ {
+ static int spurious_irq_mask;
+ /*
+ * At this point we can be sure the IRQ is spurious,
+ * lets ACK and report it. [once per IRQ]
+ */
+ if (!(spurious_irq_mask & irqmask)) {
+ printk("spurious 8259A interrupt: IRQ%d.\n", irq);
+ spurious_irq_mask |= irqmask;
+ }
+ atomic_inc(&irq_err_count);
+ /*
+ * Theoretically we do not have to handle this IRQ,
+ * but in Linux this does not cause problems and is
+ * simpler for us.
+ */
+ goto handle_real_irq;
+ }
+}
+
+void __init init_8259A(int auto_eoi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ outb(0xff, 0x21); /* mask all of 8259A-1 */
+ outb(0xff, 0xA1); /* mask all of 8259A-2 */
+
+ /*
+ * outb_p - this has to work on a wide range of PC hardware.
+ */
+ outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */
+ outb_p(0x30 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+ outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */
+ if (auto_eoi)
+ outb_p(0x03, 0x21); /* master does Auto EOI */
+ else
+ outb_p(0x01, 0x21); /* master expects normal EOI */
+
+ outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */
+ outb_p(0x30 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+ outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */
+ outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode
+ is to be investigated) */
+
+ if (auto_eoi)
+ /*
+ * in AEOI mode we just have to mask the interrupt
+ * when acking.
+ */
+ i8259A_irq_type.ack = disable_8259A_irq;
+ else
+ i8259A_irq_type.ack = mask_and_ack_8259A;
+
+ udelay(100); /* wait for 8259A to initialize */
+
+ outb(cached_21, 0x21); /* restore master IRQ mask */
+ outb(cached_A1, 0xA1); /* restore slave IRQ mask */
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+
+void __init init_ISA_irqs (void)
+{
+ int i;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ init_bsp_APIC();
+#endif
+ init_8259A(0);
+
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc[i].status = IRQ_DISABLED;
+ irq_desc[i].action = 0;
+ irq_desc[i].depth = 1;
+
+ if (i < 16) {
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ irq_desc[i].handler = &i8259A_irq_type;
+ } else {
+ /*
+ * 'high' PCI IRQs filled in on demand
+ */
+ irq_desc[i].handler = &no_irq_type;
+ }
+ }
+}
+
+void __init init_IRQ(void)
+{
+ int i;
+
+ init_ISA_irqs();
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (vector != HYPERVISOR_CALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+#ifdef CONFIG_SMP
+ /*
+ * IRQ0 must be given a fixed assignment and initialized,
+ * because it's used before the IO-APIC is set up.
+ */
+ set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+ /*
+ * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+ * IPI, driven by wakeup.
+ */
+ set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
+
+ /* IPI for invalidation */
+ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+ /* IPI for generic function call */
+ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ /* self generated IPI for local APIC timer */
+ set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+ /* IPI vectors for APIC spurious and error interrupts */
+ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+ set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+ /*
+ * Set the clock to HZ Hz, we already have a valid
+ * vector now:
+ */
+#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
+#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
+ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8 , 0x40); /* MSB */
+
+ setup_irq(2, &irq2);
+}
+
+/*
+ * we only need the timer interrupt for callibrating the tsc<->time<->bus cycle
+ * mappings. After this all timeing related functions should be run of the
+ * APIC timers. This function allows us to disable the
+ */
+void __init disable_pit(void)
+{
+ printk("Disable PIT. Not needed anymore\n");
+ /* This is not the most elegant way, but hey. */
+ disable_irq(0);
+}
diff --git a/xen/arch/i386/idle0_task.c b/xen/arch/i386/idle0_task.c
new file mode 100644
index 0000000000..0d2b9e40bf
--- /dev/null
+++ b/xen/arch/i386/idle0_task.c
@@ -0,0 +1,20 @@
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/desc.h>
+
+/*
+ * Initial task structure. XXX KAF: To get this 8192-byte aligned without
+ * linker tricks I copy it into aligned BSS area at boot time.
+ * Actual name idle0_task_union now declared in boot.S.
+ */
+struct task_struct first_task_struct = IDLE0_TASK(idle0_task_union.task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+
diff --git a/xen/arch/i386/io_apic.c b/xen/arch/i386/io_apic.c
new file mode 100644
index 0000000000..878c189afb
--- /dev/null
+++ b/xen/arch/i386/io_apic.c
@@ -0,0 +1,1683 @@
+/*
+ * Intel IO-APIC support for multi-Pentium hosts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ * Many thanks to Stig Venaas for trying out countless experimental
+ * patches and reporting/debugging problems patiently!
+ *
+ * (c) 1999, Multiple IO-APIC support, developed by
+ * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ * further tested and cleaned up by Zach Brown <zab@redhat.com>
+ * and Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <xeno/delay.h>
+#include <xeno/sched.h>
+#include <xeno/config.h>
+#include <asm/mc146818rtc.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/smpboot.h>
+
+
+static unsigned int nmi_watchdog; /* XXXX XEN */
+
+#undef APIC_LOCKUP_DEBUG
+
+#define APIC_LOCKUP_DEBUG
+
+static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+
+unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
+unsigned char int_delivery_mode = dest_LowestPrio;
+
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+ int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+ static int first_free_entry = NR_IRQS;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (entry->next)
+ entry = irq_2_pin + entry->next;
+
+ if (entry->pin != -1) {
+ entry->next = first_free_entry;
+ entry = irq_2_pin + entry->next;
+ if (++first_free_entry >= PIN_MAP_SIZE)
+ panic("io_apic.c: whoops");
+ }
+ entry->apic = apic;
+ entry->pin = pin;
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (1) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ }
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
+#define __DO_ACTION(R, ACTION, FINAL) \
+ \
+{ \
+ int pin; \
+ struct irq_pin_list *entry = irq_2_pin + irq; \
+ \
+ for (;;) { \
+ unsigned int reg; \
+ pin = entry->pin; \
+ if (pin == -1) \
+ break; \
+ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
+ reg ACTION; \
+ io_apic_modify(entry->apic, reg); \
+ if (!entry->next) \
+ break; \
+ entry = irq_2_pin + entry->next; \
+ } \
+ FINAL; \
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL) \
+ \
+ static void name##_IO_APIC_irq (unsigned int irq) \
+ __DO_ACTION(R, ACTION, FINAL)
+
+DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
+ /* mask = 1 */
+DO_ACTION( __unmask, 0, &= 0xfffeffff, )
+ /* mask = 0 */
+DO_ACTION( __mask_and_edge, 0, = (reg & 0xffff7fff) | 0x00010000, )
+ /* mask = 1, trigger = 0 */
+DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffeffff) | 0x00008000, )
+ /* mask = 0, trigger = 1 */
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __mask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 1;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ clear_IO_APIC_pin(apic, pin);
+}
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+int pirq_entries [MAX_PIRQS];
+int pirqs_enabled;
+
+int skip_ioapic_setup;
+#if 0
+
+static int __init noioapic_setup(char *str)
+{
+ skip_ioapic_setup = 1;
+ return 1;
+}
+
+__setup("noapic", noioapic_setup);
+
+static int __init ioapic_setup(char *str)
+{
+ skip_ioapic_setup = 0;
+ return 1;
+}
+
+__setup("apic", ioapic_setup);
+
+
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ pirqs_enabled = 1;
+ printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+#endif
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int __init find_irq_entry(int apic, int pin, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_irqtype == type &&
+ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].mpc_dstirq == pin)
+ return i;
+
+ return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+
+ return mp_irqs[i].mpc_dstirq;
+ }
+ return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+ int apic, i, best_guess = -1;
+
+ Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
+ printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ break;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+ !mp_irqs[i].mpc_irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+ return irq;
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0)
+ best_guess = irq;
+ }
+ }
+ return best_guess;
+}
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int __init EISA_ELCR(unsigned int irq)
+{
+ if (irq < 16) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx) (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx) (0)
+#define default_ISA_polarity(idx) (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx) (1)
+#define default_PCI_polarity(idx) (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx) (1)
+#define default_MCA_polarity(idx) (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int polarity;
+
+ /*
+ * Determine IRQ line polarity (high active or low active):
+ */
+ switch (mp_irqs[idx].mpc_irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ polarity = default_ISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ polarity = default_EISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ polarity = default_PCI_polarity(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ polarity = default_MCA_polarity(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* high active */
+ {
+ polarity = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ case 3: /* low active */
+ {
+ polarity = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ return polarity;
+}
+
+static int __init MPBIOS_trigger(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int trigger;
+
+ /*
+ * Determine IRQ trigger mode (edge or level sensitive):
+ */
+ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ trigger = default_ISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ trigger = default_PCI_trigger(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* edge */
+ {
+ trigger = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ case 3: /* level */
+ {
+ trigger = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 0;
+ break;
+ }
+ }
+ return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+ return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+ return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+ int irq, i;
+ int bus = mp_irqs[idx].mpc_srcbus;
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].mpc_dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ case MP_BUS_EISA:
+ case MP_BUS_MCA:
+ {
+ irq = mp_irqs[idx].mpc_srcbusirq;
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /*
+ * PCI IRQs are mapped in order
+ */
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
+ break;
+ }
+ default:
+ {
+ printk(KERN_ERR "unknown bus type %d.\n",bus);
+ irq = 0;
+ break;
+ }
+ }
+
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
+ return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
+}
+
+int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
+
+static int __init assign_irq_vector(int irq)
+{
+ static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+ if (IO_APIC_VECTOR(irq) > 0)
+ return IO_APIC_VECTOR(irq);
+next:
+ current_vector += 8;
+
+ /* XXX Skip the guestOS -> Xen syscall vector! XXX */
+ if (current_vector == HYPERVISOR_CALL_VECTOR) goto next;
+ /* XXX Skip the Linux/BSD fast-trap vector! XXX */
+ if (current_vector == 0x80) goto next;
+
+#if 0
+ if (current_vector == SYSCALL_VECTOR)
+ goto next;
+#endif
+
+ if (current_vector > FIRST_SYSTEM_VECTOR) {
+ offset++;
+ current_vector = FIRST_DEVICE_VECTOR + offset;
+ }
+
+ if (current_vector == FIRST_SYSTEM_VECTOR)
+ panic("ran out of interrupt sources!");
+
+ IO_APIC_VECTOR(irq) = current_vector;
+ return current_vector;
+}
+
+extern void (*interrupt[NR_IRQS])(void);
+static struct hw_interrupt_type ioapic_level_irq_type;
+static struct hw_interrupt_type ioapic_edge_irq_type;
+
+void __init setup_IO_APIC_irqs(void)
+{
+ struct IO_APIC_route_entry entry;
+ int apic, pin, idx, irq, first_notcon = 1, vector;
+ unsigned long flags;
+
+ printk(KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
+ entry.mask = 0; /* enable IRQ */
+ entry.dest.logical.logical_dest = target_cpus();
+
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if (idx == -1) {
+ if (first_notcon) {
+ printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ first_notcon = 0;
+ } else
+ printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ continue;
+ }
+
+ entry.trigger = irq_trigger(idx);
+ entry.polarity = irq_polarity(idx);
+
+ if (irq_trigger(idx)) {
+ entry.trigger = 1;
+ entry.mask = 1;
+ }
+
+ irq = pin_2_irq(idx, apic, pin);
+ /*
+ * skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum
+ */
+ if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ && (apic != 0) && (irq == 0))
+ continue;
+ else
+ add_pin_to_irq(irq, apic, pin);
+
+ if (!apic && !IO_APIC_IRQ(irq))
+ continue;
+
+ if (IO_APIC_IRQ(irq)) {
+ vector = assign_irq_vector(irq);
+ entry.vector = vector;
+
+ if (IO_APIC_irq_trigger(irq))
+ irq_desc[irq].handler = &ioapic_level_irq_type;
+ else
+ irq_desc[irq].handler = &ioapic_edge_irq_type;
+
+ set_intr_gate(vector, interrupt[irq]);
+
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ }
+
+ if (!first_notcon)
+ printk(" not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin as broadcast to all
+ * CPUs.
+ */
+void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry,0,sizeof(entry));
+
+ disable_8259A_irq(0);
+
+ /* mask LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+ /*
+ * We use logical delivery to get the timer IRQ
+ * to the first CPU.
+ */
+ entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
+ entry.mask = 0; /* unmask IRQ now */
+ entry.dest.logical.logical_dest = target_cpus();
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.polarity = 0;
+ entry.trigger = 0;
+ entry.vector = vector;
+
+ /*
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+ irq_desc[0].handler = &ioapic_edge_irq_type;
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ enable_8259A_irq(0);
+}
+
+void __init UNEXPECTED_IO_APIC(void)
+{
+ printk(KERN_WARNING
+ "An unexpected IO-APIC was found. If this kernel release is less than\n"
+ "three months old please report this to linux-smp@vger.kernel.org\n");
+}
+
+void __init print_IO_APIC(void)
+{
+ int apic, i;
+ struct IO_APIC_reg_00 reg_00;
+ struct IO_APIC_reg_01 reg_01;
+ struct IO_APIC_reg_02 reg_02;
+ unsigned long flags;
+
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for (i = 0; i < nr_ioapics; i++)
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+ /*
+ * We are a bit conservative about what we expect. We have to
+ * know about every hardware change ASAP.
+ */
+ printk(KERN_INFO "testing the IO APIC.......................\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)&reg_00 = io_apic_read(apic, 0);
+ *(int *)&reg_01 = io_apic_read(apic, 1);
+ if (reg_01.version >= 0x10)
+ *(int *)&reg_02 = io_apic_read(apic, 2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk("\n");
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID);
+ if (reg_00.__reserved_1 || reg_00.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries);
+ if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */
+ (reg_01.entries != 0x17) && /* typical ISA+PCI boards */
+ (reg_01.entries != 0x1b) && /* Compaq Proliant boards */
+ (reg_01.entries != 0x1f) && /* dual Xeon boards */
+ (reg_01.entries != 0x22) && /* bigger Xeon boards */
+ (reg_01.entries != 0x2E) &&
+ (reg_01.entries != 0x3F)
+ )
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version);
+ if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */
+ (reg_01.version != 0x02) && /* VIA */
+ (reg_01.version != 0x10) && /* oldest IO-APICs */
+ (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
+ (reg_01.version != 0x13) && /* Xeon IO-APICs */
+ (reg_01.version != 0x20) /* Intel P64H (82806 AA) */
+ )
+ UNEXPECTED_IO_APIC();
+ if (reg_01.__reserved_1 || reg_01.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ if (reg_01.version >= 0x10) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration);
+ if (reg_02.__reserved_1 || reg_02.__reserved_2)
+ UNEXPECTED_IO_APIC();
+ }
+
+#if 0
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+ " Stat Dest Deli Vect: \n");
+
+ for (i = 0; i <= reg_01.entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+ entry.dest.logical.logical_dest,
+ entry.dest.physical.physical_dest
+ );
+
+ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector
+ );
+ }
+ }
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+ printk(KERN_DEBUG "IRQ%d ", i);
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ printk("\n");
+#endif
+ }
+
+ printk(KERN_INFO ".................................... done.\n");
+
+ return;
+}
+
+static void print_APIC_bitfield (int base)
+{
+ unsigned int v;
+ int i, j;
+
+ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+ for (i = 0; i < 8; i++) {
+ v = apic_read(base + i*0x10);
+ for (j = 0; j < 32; j++) {
+ if (v & (1<<j))
+ printk("1");
+ else
+ printk("0");
+ }
+ printk("\n");
+ }
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+ unsigned int v, ver, maxlvt;
+
+ printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+ smp_processor_id(), hard_smp_processor_id());
+ v = apic_read(APIC_ID);
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
+ v = apic_read(APIC_LVR);
+ printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+ ver = GET_APIC_VERSION(v);
+ maxlvt = get_maxlvt();
+
+ v = apic_read(APIC_TASKPRI);
+ printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ }
+
+ v = apic_read(APIC_EOI);
+ printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+ v = apic_read(APIC_RRR);
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+ v = apic_read(APIC_LDR);
+ printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+ v = apic_read(APIC_DFR);
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ v = apic_read(APIC_SPIV);
+ printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+ printk(KERN_DEBUG "... APIC ISR field:\n");
+ print_APIC_bitfield(APIC_ISR);
+ printk(KERN_DEBUG "... APIC TMR field:\n");
+ print_APIC_bitfield(APIC_TMR);
+ printk(KERN_DEBUG "... APIC IRR field:\n");
+ print_APIC_bitfield(APIC_IRR);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_ICR);
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+ v = apic_read(APIC_ICR2);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+ v = apic_read(APIC_LVTT);
+ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+ if (maxlvt > 3) { /* PC is LVT#4. */
+ v = apic_read(APIC_LVTPC);
+ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+ }
+ v = apic_read(APIC_LVT0);
+ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+ v = apic_read(APIC_LVT1);
+ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+ if (maxlvt > 2) { /* ERR is LVT#3. */
+ v = apic_read(APIC_LVTERR);
+ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_TMICT);
+ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+ v = apic_read(APIC_TMCCT);
+ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+ v = apic_read(APIC_TDCR);
+ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+ printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+ smp_call_function(print_local_APIC, NULL, 1, 1);
+ print_local_APIC(NULL);
+}
+
+void /*__init*/ print_PIC(void)
+{
+ extern spinlock_t i8259A_lock;
+ unsigned int v, flags;
+
+ printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ v = inb(0xa1) << 8 | inb(0x21);
+ printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
+
+ v = inb(0xa0) << 8 | inb(0x20);
+ printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
+
+ outb(0x0b,0xa0);
+ outb(0x0b,0x20);
+ v = inb(0xa0) << 8 | inb(0x20);
+ outb(0x0a,0xa0);
+ outb(0x0a,0x20);
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
+
+ v = inb(0x4d1) << 8 | inb(0x4d0);
+ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+static void __init enable_IO_APIC(void)
+{
+ struct IO_APIC_reg_01 reg_01;
+ int i;
+ unsigned long flags;
+
+ for (i = 0; i < PIN_MAP_SIZE; i++) {
+ irq_2_pin[i].pin = -1;
+ irq_2_pin[i].next = 0;
+ }
+ if (!pirqs_enabled)
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (i = 0; i < nr_ioapics; i++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)&reg_01 = io_apic_read(i, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[i] = reg_01.entries+1;
+ }
+
+ /*
+ * Do not trust the IO-APIC being empty at bootup
+ */
+ clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+
+ disconnect_bsp_APIC();
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc (void)
+{
+ struct IO_APIC_reg_00 reg_00;
+ unsigned long phys_id_present_map = phys_cpu_present_map;
+ int apic;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ if (clustered_apic_mode)
+ /* We don't have a good way to do this yet - hack */
+ phys_id_present_map = (u_long) 0xf;
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ /* Read the register 0 value */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)&reg_00 = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ old_id = mp_ioapics[apic].mpc_apicid;
+
+ if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ reg_00.ID);
+ mp_ioapics[apic].mpc_apicid = reg_00.ID;
+ }
+
+ /*
+ * Sanity check, is the ID really free? Every APIC in a
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
+ */
+ if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
+ (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ for (i = 0; i < 0xf; i++)
+ if (!(phys_id_present_map & (1 << i)))
+ break;
+ if (i >= apic_broadcast_id)
+ panic("Max APIC ID exceeded!\n");
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ i);
+ phys_id_present_map |= 1 << i;
+ mp_ioapics[apic].mpc_apicid = i;
+ } else {
+ printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
+ phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
+ }
+
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mp_ioapics[apic].mpc_apicid)
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_dstapic == old_id)
+ mp_irqs[i].mpc_dstapic
+ = mp_ioapics[apic].mpc_apicid;
+
+ /*
+ * Read the right value from the MPC table and
+ * write it into the ID register.
+ */
+ printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+ mp_ioapics[apic].mpc_apicid);
+
+ reg_00.ID = mp_ioapics[apic].mpc_apicid;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0, *(int *)&reg_00);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)&reg_00 = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
+ panic("could not set ID!\n");
+ else
+ printk(" ok.\n");
+ }
+}
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ * - timer IRQ defaults to IO-APIC IRQ
+ * - if this function detects that timer IRQs are defunct, then we fall
+ * back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+ unsigned int t1 = jiffies;
+
+ sti();
+ /* Let ten ticks pass... */
+ mdelay((10 * 1000) / HZ);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+ if (jiffies - t1 > 4)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+#define enable_edge_ioapic_irq unmask_IO_APIC_irq
+
+static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+ int was_pending = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < 16) {
+ disable_8259A_irq(irq);
+ if (i8259A_irq_pending(irq))
+ was_pending = 1;
+ }
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return was_pending;
+}
+
+#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+ == (IRQ_PENDING | IRQ_DISABLED))
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
+
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+ unmask_IO_APIC_irq(irq);
+
+ return 0; /* don't check for pending */
+}
+
+#define shutdown_level_ioapic_irq mask_IO_APIC_irq
+#define enable_level_ioapic_irq unmask_IO_APIC_irq
+#define disable_level_ioapic_irq mask_IO_APIC_irq
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+ unsigned long v;
+ int i;
+
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
+ i = IO_APIC_VECTOR(irq);
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+ ack_APIC_irq();
+
+ if (!(v & (1 << (i & 0x1f)))) {
+#ifdef APIC_LOCKUP_DEBUG
+ struct irq_pin_list *entry;
+#endif
+
+#ifdef APIC_MISMATCH_DEBUG
+ atomic_inc(&irq_mis_count);
+#endif
+ spin_lock(&ioapic_lock);
+ __mask_and_edge_IO_APIC_irq(irq);
+#ifdef APIC_LOCKUP_DEBUG
+ for (entry = irq_2_pin + irq;;) {
+ unsigned int reg;
+
+ if (entry->pin == -1)
+ break;
+ reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
+ if (reg & 0x00004000)
+ printk(KERN_CRIT "Aieee!!! Remote IRR"
+ " still set after unlock!\n");
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+#endif
+ __unmask_and_level_IO_APIC_irq(irq);
+ spin_unlock(&ioapic_lock);
+ }
+}
+
+static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+{
+ unsigned long flags;
+ /*
+ * Only the first 8 bits are valid.
+ */
+ mask = mask << 24;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __DO_ACTION(1, = mask, )
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+static struct hw_interrupt_type ioapic_edge_irq_type = {
+ "IO-APIC-edge",
+ startup_edge_ioapic_irq,
+ shutdown_edge_ioapic_irq,
+ enable_edge_ioapic_irq,
+ disable_edge_ioapic_irq,
+ ack_edge_ioapic_irq,
+ end_edge_ioapic_irq,
+ set_ioapic_affinity,
+};
+
+static struct hw_interrupt_type ioapic_level_irq_type = {
+ "IO-APIC-level",
+ startup_level_ioapic_irq,
+ shutdown_level_ioapic_irq,
+ enable_level_ioapic_irq,
+ disable_level_ioapic_irq,
+ mask_and_ack_level_ioapic_irq,
+ end_level_ioapic_irq,
+ set_ioapic_affinity,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+ int irq;
+
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ for (irq = 0; irq < NR_IRQS ; irq++) {
+ if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+ * interrupt if we can..
+ */
+ if (irq < 16)
+ make_8259A_irq(irq);
+ else
+ /* Strange. Oh, well.. */
+ irq_desc[irq].handler = &no_irq_type;
+ }
+ }
+}
+
+static void enable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+ ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+ "local-APIC-edge",
+ NULL, /* startup_irq() not used for IRQ0 */
+ NULL, /* shutdown_irq() not used for IRQ0 */
+ enable_lapic_irq,
+ disable_lapic_irq,
+ ack_lapic_irq,
+ end_lapic_irq
+};
+
+static void enable_NMI_through_LVT0 (void * dummy)
+{
+ unsigned int v, ver;
+
+ ver = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(ver);
+ v = APIC_DM_NMI; /* unmask and set to NMI */
+ if (!APIC_INTEGRATED(ver)) /* 82489DX */
+ v |= APIC_LVT_LEVEL_TRIGGER;
+ apic_write_around(APIC_LVT0, v);
+}
+
+static void setup_nmi (void)
+{
+ /*
+ * Dirty trick to enable the NMI watchdog ...
+ * We put the 8259A master into AEOI mode and
+ * unmask on all local APICs LVT0 as NMI.
+ *
+ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+ * is from Maciej W. Rozycki - so we do not have to EOI from
+ * the NMI handler or the timer interrupt.
+ */
+ printk(KERN_INFO "activating NMI Watchdog ...");
+
+ smp_call_function(enable_NMI_through_LVT0, NULL, 1, 1);
+ enable_NMI_through_LVT0(NULL);
+
+ printk(" done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic. ICR does
+ * not support the ExtINT mode, unfortunately. We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA. --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+ int pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+ unsigned long flags;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ if (pin == -1)
+ return;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ clear_IO_APIC_pin(0, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+
+ entry1.dest_mode = 0; /* physical delivery */
+ entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest.physical.physical_dest = hard_smp_processor_id();
+ entry1.delivery_mode = dest_ExtINT;
+ entry1.polarity = entry0.polarity;
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+ RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+ i = 100;
+ while (i-- > 0) {
+ mdelay(10);
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+ i -= 10;
+ }
+
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(0, pin);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
+ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+ extern int timer_ack;
+ int pin1, pin2;
+ int vector;
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ disable_8259A_irq(0);
+ vector = assign_irq_vector(0);
+ set_intr_gate(vector, interrupt[0]);
+
+ /*
+ * Subtle, code in do_timer_interrupt() expects an AEOI
+ * mode for the 8259A whenever interrupts are routed
+ * through I/O APICs. Also IRQ0 has to be enabled in
+ * the 8259A which implies the virtual wire has to be
+ * disabled in the local APIC.
+ */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ init_8259A(1);
+ timer_ack = 1;
+ enable_8259A_irq(0);
+
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+ printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+ if (pin1 != -1) {
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ unmask_IO_APIC_irq(0);
+ if (timer_irq_works()) {
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ setup_nmi();
+ enable_8259A_irq(0);
+ // XXX Xen check_nmi_watchdog();
+ }
+ return;
+ }
+ clear_IO_APIC_pin(0, pin1);
+ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+ }
+
+ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+ if (pin2 != -1) {
+ printk("\n..... (found pin %d) ...", pin2);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ setup_ExtINT_IRQ0_pin(pin2, vector);
+ if (timer_irq_works()) {
+ printk("works.\n");
+ if (pin1 != -1)
+ replace_pin_at_irq(0, 0, pin1, 0, pin2);
+ else
+ add_pin_to_irq(0, 0, pin2);
+ if (nmi_watchdog == NMI_IO_APIC) {
+ setup_nmi();
+ // XXX Xen check_nmi_watchdog();
+ }
+ return;
+ }
+ /*
+ * Cleanup, just in case ...
+ */
+ clear_IO_APIC_pin(0, pin2);
+ }
+ printk(" failed.\n");
+
+ if (nmi_watchdog) {
+ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+ nmi_watchdog = 0;
+ }
+
+ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+ disable_8259A_irq(0);
+ irq_desc[0].handler = &lapic_irq_type;
+ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ enable_8259A_irq(0);
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ printk(" failed.\n");
+
+ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+ init_8259A(0);
+ make_8259A_irq(0);
+ apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+ unlock_ExtINT_logic();
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ printk(" failed :(.\n");
+ panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+}
+
+/*
+ *
+ * IRQ's that are handled by the old PIC in all cases:
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ * Linux doesn't really care, as it's not actually used
+ * for any interrupt handling anyway.
+ * - There used to be IRQ13 here as well, but all
+ * MPS-compliant must not use it for FPU coupling and we
+ * want to use exception 16 anyway. And there are
+ * systems who connect it to an I/O APIC for other uses.
+ * Thus we don't mark it special any longer.
+ *
+ * Additionally, something is definitely wrong with irq9
+ * on PIIX4 boards.
+ */
+#define PIC_IRQS (1<<2)
+
+void __init setup_IO_APIC(void)
+{
+ enable_IO_APIC();
+
+ io_apic_irqs = ~PIC_IRQS;
+ printk("ENABLING IO-APIC IRQs\n");
+
+ /*
+ * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS
+ * mptable:
+ */
+ setup_ioapic_ids_from_mpc();
+ sync_Arb_IDs();
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ check_timer();
+ print_IO_APIC();
+}
diff --git a/xen/arch/i386/ioremap.c b/xen/arch/i386/ioremap.c
new file mode 100644
index 0000000000..4ed7ba438d
--- /dev/null
+++ b/xen/arch/i386/ioremap.c
@@ -0,0 +1,106 @@
+/*
+ * arch/i386/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+//#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+
+static unsigned long remap_base = 0;
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY)
+
+#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+static void new_l2e(l2_pgentry_t *pl2e)
+{
+ l1_pgentry_t *pl1e = (l1_pgentry_t *)get_free_page(GFP_KERNEL);
+ if ( !pl1e ) BUG();
+ clear_page(pl1e);
+ *pl2e = mk_l2_pgentry(__pa(pl1e)|L2_PROT);
+}
+
+
+void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+ unsigned long vaddr;
+ unsigned long offset, cur=0, last_addr;
+ l2_pgentry_t *pl2e;
+ l1_pgentry_t *pl1e;
+
+ /* First time through, start allocating from far end of virtual memory. */
+ if ( !remap_base ) remap_base = IOREMAP_VIRT_START;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
+ /*
+ * Don't remap the low PCI/ISA area, it's always mapped..
+ */
+ if (phys_addr >= 0xA0000 && last_addr < 0x100000)
+ return phys_to_virt(phys_addr);
+
+#if 0
+ /*
+ * Don't allow anybody to remap normal RAM that we're using..
+ */
+ if (phys_addr < virt_to_phys(high_memory)) {
+ char *t_addr, *t_end;
+ struct pfn_info *page;
+
+ t_addr = __va(phys_addr);
+ t_end = t_addr + (size - 1);
+
+ for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+ if(!PageReserved(page))
+ return NULL;
+ }
+#endif
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr) - phys_addr;
+
+ /*
+ * Ok, go for it..
+ */
+ vaddr = remap_base;
+ remap_base += size;
+ pl2e = idle0_pg_table + l2_table_offset(vaddr);
+ if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e);
+ pl1e = l2_pgentry_to_l1(*pl2e++) + l1_table_offset(vaddr);
+ for ( ; ; )
+ {
+ if ( !l1_pgentry_empty(*pl1e) ) BUG();
+ *pl1e++ = mk_l1_pgentry((phys_addr+cur)|L1_PROT|flags);
+ cur += PAGE_SIZE;
+ if ( cur == size ) break;
+ if ( !((unsigned long)pl1e & (PAGE_SIZE-1)) )
+ {
+ if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e);
+ pl1e = l2_pgentry_to_l1(*pl2e++);
+ }
+ }
+
+ flush_tlb_all();
+
+ return (void *) (offset + (char *)vaddr);
+}
+
+void iounmap(void *addr)
+{
+ /* NOP for now. */
+}
diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c
new file mode 100644
index 0000000000..e58fb8f2ad
--- /dev/null
+++ b/xen/arch/i386/irq.c
@@ -0,0 +1,895 @@
+/*
+ * linux/arch/i386/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setup_irqs with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <xeno/slab.h>
+
+#include <asm/msr.h>
+#include <asm/hardirq.h>
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/pgalloc.h>
+#include <xeno/delay.h>
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+/*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+#if CONFIG_X86
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ */
+ ack_APIC_irq();
+#endif
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none disable_none
+#define end_none enable_none
+
+struct hw_interrupt_type no_irq_type = {
+ "none",
+ startup_none,
+ shutdown_none,
+ enable_none,
+ disable_none,
+ ack_none,
+ end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+/*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+#ifdef CONFIG_SMP
+unsigned char global_irq_holder = 0xff;
+unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+
+#define MAXCOUNT 100000000
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes. I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+ for (;;) {
+
+ /*
+ * Wait until all interrupts are gone. Wait
+ * for bottom half handlers unless we're
+ * already executing in one..
+ */
+ if (!irqs_running())
+ if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+ break;
+
+ /* Duh, we have to loop. Release the lock to avoid deadlocks */
+ clear_bit(0,&global_irq_lock);
+
+ for (;;) {
+ __sti();
+ SYNC_OTHER_CORES(cpu);
+ __cli();
+ if (irqs_running())
+ continue;
+ if (global_irq_lock)
+ continue;
+ if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+ continue;
+ if (!test_and_set_bit(0,&global_irq_lock))
+ break;
+ }
+ }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+ if (irqs_running()) {
+ /* Stupid approach */
+ cli();
+ sti();
+ }
+}
+
+static inline void get_irqlock(int cpu)
+{
+ if (test_and_set_bit(0,&global_irq_lock)) {
+ /* do we already hold the lock? */
+ if ((unsigned char) cpu == global_irq_holder)
+ return;
+ /* Uhhuh.. Somebody else got it. Wait.. */
+ do {
+ do {
+ rep_nop();
+ } while (test_bit(0,&global_irq_lock));
+ } while (test_and_set_bit(0,&global_irq_lock));
+ }
+ /*
+ * We also to make sure that nobody else is running
+ * in an interrupt context.
+ */
+ wait_on_irq(cpu);
+
+ /*
+ * Ok, finally..
+ */
+ global_irq_holder = cpu;
+}
+
+#define EFLAGS_IF_SHIFT 9
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void __global_cli(void)
+{
+ unsigned int flags;
+
+ __save_flags(flags);
+ if (flags & (1 << EFLAGS_IF_SHIFT)) {
+ int cpu = smp_processor_id();
+ __cli();
+ if (!local_irq_count(cpu))
+ get_irqlock(cpu);
+ }
+}
+
+void __global_sti(void)
+{
+ int cpu = smp_processor_id();
+
+ if (!local_irq_count(cpu))
+ release_irqlock(cpu);
+ __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+ int retval;
+ int local_enabled;
+ unsigned long flags;
+ int cpu = smp_processor_id();
+
+ __save_flags(flags);
+ local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
+ /* default to local */
+ retval = 2 + local_enabled;
+
+ /* check for global flags if we're not in an interrupt */
+ if (!local_irq_count(cpu)) {
+ if (local_enabled)
+ retval = 1;
+ if (global_irq_holder == cpu)
+ retval = 0;
+ }
+ return retval;
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+ switch (flags) {
+ case 0:
+ __global_cli();
+ break;
+ case 1:
+ __global_sti();
+ break;
+ case 2:
+ __cli();
+ break;
+ case 3:
+ __sti();
+ break;
+ default:
+ printk("global_restore_flags: %08lx (%08lx)\n",
+ flags, (&flags)[-1]);
+ }
+}
+
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+{
+ int status;
+ int cpu = smp_processor_id();
+
+ irq_enter(cpu, irq);
+
+ status = 1; /* Force the "do bottom halves" bit */
+
+ if (!(action->flags & SA_INTERRUPT))
+ __sti();
+
+ do {
+ status |= action->flags;
+ action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
+
+ __cli();
+
+ irq_exit(cpu, irq);
+
+ return status;
+}
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock.
+ */
+
+/**
+ * disable_irq_nosync - disable an irq without waiting
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Disables and Enables are
+ * nested.
+ * Unlike disable_irq(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
+ *
+ * This function may be called from IRQ context.
+ */
+
+inline void disable_irq_nosync(unsigned int irq)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->depth++) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ * disable_irq - disable an irq and wait for completion
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Enables and Disables are
+ * nested.
+ * This function waits for any pending IRQ handlers for this interrupt
+ * to complete before returning. If you use this function while
+ * holding a resource the IRQ handler may need you will deadlock.
+ *
+ * This function may be called - with care - from IRQ context.
+ */
+
+void disable_irq(unsigned int irq)
+{
+ disable_irq_nosync(irq);
+
+ if (!local_irq_count(smp_processor_id())) {
+ do {
+ barrier();
+ cpu_relax();
+ } while (irq_desc[irq].status & IRQ_INPROGRESS);
+ }
+}
+
+/**
+ * enable_irq - enable handling of an irq
+ * @irq: Interrupt to enable
+ *
+ * Undoes the effect of one call to disable_irq(). If this
+ * matches the last disable, processing of interrupts on this
+ * IRQ line is re-enabled.
+ *
+ * This function may be called from IRQ context.
+ */
+
+void enable_irq(unsigned int irq)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ switch (desc->depth) {
+ case 1: {
+ unsigned int status = desc->status & ~IRQ_DISABLED;
+ desc->status = status;
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ desc->status = status | IRQ_REPLAY;
+ hw_resend_irq(desc->handler,irq);
+ }
+ desc->handler->enable(irq);
+ /* fall-through */
+ }
+ default:
+ desc->depth--;
+ break;
+ case 0:
+ printk("enable_irq(%u) unbalanced from %p\n", irq,
+ __builtin_return_address(0));
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(struct pt_regs regs)
+{
+ /*
+ * We ack quickly, we don't want the irq controller
+ * thinking we're snobs just because some other CPU has
+ * disabled global interrupts (we have already done the
+ * INT_ACK cycles, it's too late to try to pretend to the
+ * controller that we aren't taking the interrupt).
+ *
+ * 0 return value means that this irq is already being
+ * handled by some other CPU. (or is disabled)
+ */
+ int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */
+ int cpu = smp_processor_id();
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
+
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+ /*
+ REPLAY is when Linux resends an IRQ that was dropped earlier
+ WAITING is used by probe to mark irqs that are being tested
+ */
+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+ status |= IRQ_PENDING; /* we _want_ to handle it */
+
+ /*
+ * If the IRQ is disabled for whatever reason, we cannot
+ * use the action we have.
+ */
+ action = NULL;
+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+ action = desc->action;
+ status &= ~IRQ_PENDING; /* we commit to handling */
+ status |= IRQ_INPROGRESS; /* we are handling it */
+ }
+ desc->status = status;
+
+ /*
+ * If there is no IRQ handler or it was disabled, exit early.
+ Since we set PENDING, if another processor is handling
+ a different instance of this same irq, the other processor
+ will take care of it.
+ */
+ if (!action)
+ goto out;
+
+ /*
+ * Edge triggered interrupts need to remember
+ * pending events.
+ * This applies to any hw interrupts that allow a second
+ * instance of the same irq to arrive while we are in do_IRQ
+ * or in the handler. But the code here only handles the _second_
+ * instance of the irq, not the third or fourth. So it is mostly
+ * useful for irq hardware that does not mask cleanly in an
+ * SMP environment.
+ */
+ for (;;) {
+ spin_unlock(&desc->lock);
+ handle_IRQ_event(irq, &regs, action);
+ spin_lock(&desc->lock);
+
+ if (!(desc->status & IRQ_PENDING))
+ break;
+ desc->status &= ~IRQ_PENDING;
+ }
+ desc->status &= ~IRQ_INPROGRESS;
+ out:
+ /*
+ * The ->end() handler has to deal with interrupts which got
+ * disabled while the handler was running.
+ */
+ desc->handler->end(irq);
+ spin_unlock(&desc->lock);
+
+ if (softirq_pending(cpu))
+ do_softirq();
+
+ return 1;
+}
+
+/**
+ * request_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. From the point this
+ * call is made your handler function may be invoked. Since
+ * your handler function must clear any interrupt the board
+ * raises, you must take care both to initialise your hardware
+ * and to set up the interrupt handler in the right order.
+ *
+ * Dev_id must be globally unique. Normally the address of the
+ * device data structure is used as the cookie. Since the handler
+ * receives this value it makes sense to use it.
+ *
+ * If your interrupt is shared you must pass a non NULL dev_id
+ * as this is required when freeing the interrupt.
+ *
+ * Flags:
+ *
+ * SA_SHIRQ Interrupt is shared
+ *
+ * SA_INTERRUPT Disable local interrupts while processing
+ */
+
+int request_irq(unsigned int irq,
+ void (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char * devname,
+ void *dev_id)
+{
+ int retval;
+ struct irqaction * action;
+
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+ if (!handler)
+ return -EINVAL;
+
+ action = (struct irqaction *)
+ kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+ action->flags = irqflags;
+ action->mask = 0;
+ action->name = devname;
+ action->next = NULL;
+ action->dev_id = dev_id;
+
+ retval = setup_irq(irq, action);
+ if (retval)
+ kfree(action);
+
+ return retval;
+}
+
+/**
+ * free_irq - free an interrupt
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
+ *
+ * Remove an interrupt handler. The handler is removed and if the
+ * interrupt line is no longer in use by any driver it is disabled.
+ * On a shared IRQ the caller must ensure the interrupt is disabled
+ * on the card it drives before calling this function. The function
+ * does not return until any executing interrupts for this IRQ
+ * have completed.
+ *
+ * This function may be called from interrupt context.
+ *
+ * Bugs: Attempting to free an irq in a handler for the same irq hangs
+ * the machine.
+ */
+
+void free_irq(unsigned int irq, void *dev_id)
+{
+ irq_desc_t *desc;
+ struct irqaction **p;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS)
+ return;
+
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+ for (;;) {
+ struct irqaction * action = *p;
+ if (action) {
+ struct irqaction **pp = p;
+ p = &action->next;
+ if (action->dev_id != dev_id)
+ continue;
+
+ /* Found it - now remove it from the list of entries */
+ *pp = action->next;
+ if (!desc->action) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->shutdown(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifdef CONFIG_SMP
+ /* Wait to make sure it's not being used on another CPU */
+ while (desc->status & IRQ_INPROGRESS) {
+ barrier();
+ cpu_relax();
+ }
+#endif
+ kfree(action);
+ return;
+ }
+ printk("Trying to free free IRQ%d\n",irq);
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return;
+ }
+}
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED;
+
+/**
+ * probe_irq_on - begin an interrupt autodetect
+ *
+ * Commence probing for an interrupt. The interrupts are scanned
+ * and a mask of potential interrupt lines is returned.
+ *
+ */
+
+unsigned long probe_irq_on(void)
+{
+ unsigned int i;
+ irq_desc_t *desc;
+ unsigned long val;
+ unsigned long s=0, e=0;
+
+ spin_lock(&probe_sem);
+ /*
+ * something may have generated an irq long ago and we want to
+ * flush such a longstanding irq before considering it as spurious.
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!irq_desc[i].action)
+ irq_desc[i].handler->startup(i);
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /* Wait for longstanding interrupts to trigger (20ms delay). */
+ rdtscl(s);
+ do {
+ synchronize_irq();
+ rdtscl(e);
+ } while ( ((e-s)/ticks_per_usec) < 20000 );
+
+ /*
+ * enable any unassigned irqs
+ * (we must startup again here because if a longstanding irq
+ * happened in the previous stage, it may have masked itself)
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action) {
+ desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ if (desc->handler->startup(i))
+ desc->status |= IRQ_PENDING;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /*
+ * Wait for spurious interrupts to trigger (100ms delay).
+ */
+ rdtscl(s);
+ do {
+ synchronize_irq();
+ rdtscl(e);
+ } while ( ((e-s)/ticks_per_usec) < 100000 );
+
+ /*
+ * Now filter out any obviously spurious interrupts
+ */
+ val = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ /* It triggered already - consider it spurious. */
+ if (!(status & IRQ_WAITING)) {
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ } else
+ if (i < 32)
+ val |= 1 << i;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ return val;
+}
+
+/*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+
+/**
+ * probe_irq_mask - scan a bitmap of interrupt lines
+ * @val: mask of interrupts to consider
+ *
+ * Scan the ISA bus interrupt lines and return a bitmap of
+ * active interrupts. The interrupt probe logic state is then
+ * returned to its previous value.
+ *
+ * Note: we need to scan all the irq's even though we will
+ * only return ISA irq numbers - just so that we reset them
+ * all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+ int i;
+ unsigned int mask;
+
+ mask = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (i < 16 && !(status & IRQ_WAITING))
+ mask |= 1 << i;
+
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ spin_unlock(&probe_sem);
+
+ return mask & val;
+}
+
+/*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+/**
+ * probe_irq_off - end an interrupt autodetect
+ * @val: mask of potential interrupts (unused)
+ *
+ * Scans the unused interrupt lines and returns the line which
+ * appears to have triggered the interrupt. If no interrupt was
+ * found then zero is returned. If more than one interrupt is
+ * found then minus the first candidate is returned to indicate
+ * their is doubt.
+ *
+ * The interrupt probe logic state is returned to its previous
+ * value.
+ *
+ * BUGS: When used in a module (which arguably shouldnt happen)
+ * nothing prevents two IRQ probe callers from overlapping. The
+ * results of this are non-optimal.
+ */
+
+int probe_irq_off(unsigned long val)
+{
+ int i, irq_found, nr_irqs;
+
+ nr_irqs = 0;
+ irq_found = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = i;
+ nr_irqs++;
+ }
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ spin_unlock(&probe_sem);
+
+ if (nr_irqs > 1)
+ irq_found = -irq_found;
+ return irq_found;
+}
+
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+ int shared = 0;
+ unsigned long flags;
+ struct irqaction *old, **p;
+ irq_desc_t *desc = irq_desc + irq;
+
+ /*
+ * The following block of code has to be executed atomically
+ */
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+ if ((old = *p) != NULL) {
+ /* Can't share interrupts unless both agree to */
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return -EBUSY;
+ }
+
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+ }
+
+ *p = new;
+
+ if (!shared) {
+ desc->depth = 0;
+ desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
+ desc->handler->startup(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ return 0;
+}
diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c
new file mode 100644
index 0000000000..2d4d8ddf52
--- /dev/null
+++ b/xen/arch/i386/mm.c
@@ -0,0 +1,141 @@
+#include <xeno/config.h>
+#include <xeno/lib.h>
+#include <xeno/init.h>
+#include <xeno/mm.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+
+static inline void set_pte_phys (unsigned long vaddr,
+ l1_pgentry_t entry)
+{
+ l2_pgentry_t *l2ent;
+ l1_pgentry_t *l1ent;
+
+ l2ent = idle0_pg_table + l2_table_offset(vaddr);
+ l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
+ *l1ent = entry;
+
+ /* It's enough to flush this one mapping. */
+ __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap (enum fixed_addresses idx,
+ l1_pgentry_t entry)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ printk("Invalid __set_fixmap\n");
+ return;
+ }
+ set_pte_phys(address, entry);
+}
+
+static void __init fixrange_init (unsigned long start,
+ unsigned long end, l2_pgentry_t *pg_base)
+{
+ l2_pgentry_t *l2e;
+ int i;
+ unsigned long vaddr, page;
+
+ vaddr = start;
+ i = l2_table_offset(vaddr);
+ l2e = pg_base + i;
+
+ for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ )
+ {
+ if ( !l2_pgentry_empty(*l2e) ) continue;
+ page = (unsigned long)get_free_page(GFP_KERNEL);
+ clear_page(page);
+ *l2e = mk_l2_pgentry(__pa(page) | PAGE_HYPERVISOR);
+ vaddr += 1 << L2_PAGETABLE_SHIFT;
+ }
+}
+
+void __init paging_init(void)
+{
+ unsigned long addr;
+ void *ioremap_pt;
+
+ /* XXX initialised in boot.S */
+ /*if ( cpu_has_pge ) set_in_cr4(X86_CR4_PGE);*/
+ /*if ( cpu_has_pse ) set_in_cr4(X86_CR4_PSE);*/
+ /*if ( cpu_has_pae ) set_in_cr4(X86_CR4_PAE);*/
+
+ /*
+ * Fixed mappings, only the page table structure has to be
+ * created - mappings will be set by set_fixmap():
+ */
+ addr = FIXADDR_START & ~((1<<L2_PAGETABLE_SHIFT)-1);
+ fixrange_init(addr, 0, idle0_pg_table);
+
+ /* Create page table for ioremap(). */
+ ioremap_pt = (void *)get_free_page(GFP_KERNEL);
+ clear_page(ioremap_pt);
+ idle0_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(ioremap_pt) | PAGE_HYPERVISOR);
+
+ /* Create read-only mapping of MPT for guest-OS use. */
+ idle0_pg_table[READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ idle0_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT];
+ mk_l2_readonly(idle0_pg_table +
+ (READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT));
+}
+
+void __init zap_low_mappings (void)
+{
+ int i, j;
+ for ( i = 0; i < smp_num_cpus; i++ )
+ {
+ for ( j = 0; j < DOMAIN_ENTRIES_PER_L2_PAGETABLE; j++ )
+ {
+ idle_pg_table[i][j] = mk_l2_pgentry(0);
+ }
+ }
+ flush_tlb_all();
+}
+
+
+long do_stack_and_ldt_switch(
+ unsigned long ss, unsigned long esp, unsigned long ldts)
+{
+ int nr = smp_processor_id();
+ struct tss_struct *t = &init_tss[nr];
+
+ if ( (ss == __HYPERVISOR_CS) || (ss == __HYPERVISOR_DS) )
+ return -1;
+
+ if ( ldts != current->mm.ldt_sel )
+ {
+ unsigned long *ptabent;
+ ptabent = (unsigned long *)GET_GDT_ADDRESS(current);
+ /* Out of range for GDT table? */
+ if ( (ldts * 8) > GET_GDT_ENTRIES(current) ) return -1;
+ ptabent += ldts * 2; /* 8 bytes per desc == 2 * unsigned long */
+ /* Not an LDT entry? (S=0b, type =0010b) */
+ if ( (*ptabent & 0x00001f00) != 0x00000200 ) return -1;
+ current->mm.ldt_sel = ldts;
+ __load_LDT(ldts);
+ }
+
+ current->thread.ss1 = ss;
+ current->thread.esp1 = esp;
+ t->ss1 = ss;
+ t->esp1 = esp;
+
+ return 0;
+}
+
+
+long do_set_gdt(unsigned long *frame_list, int entries)
+{
+ return -ENOSYS;
+}
+
+
+long do_update_descriptor(
+ unsigned long pa, unsigned long word1, unsigned long word2)
+{
+ return -ENOSYS;
+}
diff --git a/xen/arch/i386/mpparse.c b/xen/arch/i386/mpparse.c
new file mode 100644
index 0000000000..0e2ca870a9
--- /dev/null
+++ b/xen/arch/i386/mpparse.c
@@ -0,0 +1,944 @@
+/*
+ * Intel Multiprocessor Specificiation 1.1 and 1.4
+ * compliant MP-table parsing routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Alan Cox : Added EBDA scanning
+ * Ingo Molnar : various cleanups and rewrites
+ * Maciej W. Rozycki : Bits for default MP configurations
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <asm/io.h>
+#include <xeno/irq.h>
+#include <xeno/smp.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+#include <asm/smpboot.h>
+#include <xeno/kernel.h>
+
+int numnodes = 1; /* XXX Xen */
+
+/* Have we found an MP table */
+int smp_found_config;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_current_pci_id;
+int *mp_bus_id_to_type;
+int *mp_bus_id_to_node;
+int *mp_bus_id_to_local;
+int *mp_bus_id_to_pci_bus;
+int max_mp_busses;
+int max_irq_sources;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc *mp_irqs;
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+/* Internal processor count */
+static unsigned int num_processors;
+
+/* Bitmask of physically existing CPUs */
+unsigned long phys_cpu_present_map;
+unsigned long logical_cpu_present_map;
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+unsigned char esr_disable = 0;
+unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE;
+unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+#endif
+unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+#ifndef CONFIG_X86_VISWS_APIC
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+/*
+ * Processor encoding in an MP configuration block
+ */
+
+static char __init *mpc_family(int family,int model)
+{
+ static char n[32];
+ static char *model_defs[]=
+ {
+ "80486DX","80486DX",
+ "80486SX","80486DX/2 or 80487",
+ "80486SL","80486SX/2",
+ "Unknown","80486DX/2-WB",
+ "80486DX/4","80486DX/4-WB"
+ };
+
+ switch (family) {
+ case 0x04:
+ if (model < 10)
+ return model_defs[model];
+ break;
+
+ case 0x05:
+ return("Pentium(tm)");
+
+ case 0x06:
+ return("Pentium(tm) Pro");
+
+ case 0x0F:
+ if (model == 0x00)
+ return("Pentium 4(tm)");
+ if (model == 0x02)
+ return("Pentium 4(tm) XEON(tm)");
+ if (model == 0x0F)
+ return("Special controller");
+ }
+ sprintf(n,"Unknown CPU [%d:%d]",family, model);
+ return n;
+}
+
+#ifdef CONFIG_X86_IO_APIC
+// XXX Xen extern int have_acpi_tables; /* set by acpitable.c */
+#define have_acpi_tables (0)
+#else
+#define have_acpi_tables (0)
+#endif
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
+
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ int ver, quad, logical_apicid;
+
+ if (!(m->mpc_cpuflag & CPU_ENABLED))
+ return;
+
+ logical_apicid = m->mpc_apicid;
+ if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ quad = translation_table[mpc_record]->trans_quad;
+ logical_apicid = (quad << 4) +
+ (m->mpc_apicid ? m->mpc_apicid << 1 : 1);
+ printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n",
+ m->mpc_apicid,
+ mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+ (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+ m->mpc_apicver, quad, logical_apicid);
+ } else {
+ printk("Processor #%d %s APIC version %d\n",
+ m->mpc_apicid,
+ mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+ (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+ m->mpc_apicver);
+ }
+
+ if (m->mpc_featureflag&(1<<0))
+ Dprintk(" Floating point unit present.\n");
+ if (m->mpc_featureflag&(1<<7))
+ Dprintk(" Machine Exception supported.\n");
+ if (m->mpc_featureflag&(1<<8))
+ Dprintk(" 64 bit compare & exchange supported.\n");
+ if (m->mpc_featureflag&(1<<9))
+ Dprintk(" Internal APIC present.\n");
+ if (m->mpc_featureflag&(1<<11))
+ Dprintk(" SEP present.\n");
+ if (m->mpc_featureflag&(1<<12))
+ Dprintk(" MTRR present.\n");
+ if (m->mpc_featureflag&(1<<13))
+ Dprintk(" PGE present.\n");
+ if (m->mpc_featureflag&(1<<14))
+ Dprintk(" MCA present.\n");
+ if (m->mpc_featureflag&(1<<15))
+ Dprintk(" CMOV present.\n");
+ if (m->mpc_featureflag&(1<<16))
+ Dprintk(" PAT present.\n");
+ if (m->mpc_featureflag&(1<<17))
+ Dprintk(" PSE present.\n");
+ if (m->mpc_featureflag&(1<<18))
+ Dprintk(" PSN present.\n");
+ if (m->mpc_featureflag&(1<<19))
+ Dprintk(" Cache Line Flush Instruction present.\n");
+ /* 20 Reserved */
+ if (m->mpc_featureflag&(1<<21))
+ Dprintk(" Debug Trace and EMON Store present.\n");
+ if (m->mpc_featureflag&(1<<22))
+ Dprintk(" ACPI Thermal Throttle Registers present.\n");
+ if (m->mpc_featureflag&(1<<23))
+ Dprintk(" MMX present.\n");
+ if (m->mpc_featureflag&(1<<24))
+ Dprintk(" FXSR present.\n");
+ if (m->mpc_featureflag&(1<<25))
+ Dprintk(" XMM present.\n");
+ if (m->mpc_featureflag&(1<<26))
+ Dprintk(" Willamette New Instructions present.\n");
+ if (m->mpc_featureflag&(1<<27))
+ Dprintk(" Self Snoop present.\n");
+ if (m->mpc_featureflag&(1<<28))
+ Dprintk(" HT present.\n");
+ if (m->mpc_featureflag&(1<<29))
+ Dprintk(" Thermal Monitor present.\n");
+ /* 30, 31 Reserved */
+
+
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ Dprintk(" Bootup CPU\n");
+ boot_cpu_physical_apicid = m->mpc_apicid;
+ boot_cpu_logical_apicid = logical_apicid;
+ }
+
+ num_processors++;
+
+ if (m->mpc_apicid > MAX_APICS) {
+ printk("Processor #%d INVALID. (Max ID: %d).\n",
+ m->mpc_apicid, MAX_APICS);
+ --num_processors;
+ return;
+ }
+ ver = m->mpc_apicver;
+
+ logical_cpu_present_map |= 1 << (num_processors-1);
+ phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid);
+
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+ ver = 0x10;
+ }
+ apic_version[m->mpc_apicid] = ver;
+ raw_phys_apicid[num_processors - 1] = m->mpc_apicid;
+}
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+ char str[7];
+ int quad;
+
+ memcpy(str, m->mpc_bustype, 6);
+ str[6] = 0;
+
+ if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ quad = translation_table[mpc_record]->trans_quad;
+ mp_bus_id_to_node[m->mpc_busid] = quad;
+ mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local;
+ quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid;
+ printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad);
+ } else {
+ Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+ }
+
+ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+ mp_current_pci_id++;
+ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else {
+ printk("Unknown bustype %s - ignoring\n", str);
+ }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+ if (!(m->mpc_flags & MPC_APIC_USABLE))
+ return;
+
+ printk("I/O APIC #%d Version %d at 0x%lX.\n",
+ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
+ MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+ }
+ if (!m->mpc_apicaddr) {
+ printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+ " found in MP table, skipping!\n");
+ return;
+ }
+ mp_ioapics[nr_ioapics] = *m;
+ nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+ mp_irqs [mp_irq_entries] = *m;
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+ if (++mp_irq_entries == max_irq_sources)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+ Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+ /*
+ * Well it seems all SMP boards in existence
+ * use ExtINT/LVT1 == LINT0 and
+ * NMI/LVT2 == LINT1 - the following check
+ * will show us if this assumptions is false.
+ * Until then we do not have to add baggage.
+ */
+ if ((m->mpc_irqtype == mp_ExtINT) &&
+ (m->mpc_destapiclint != 0))
+ BUG();
+ if ((m->mpc_irqtype == mp_NMI) &&
+ (m->mpc_destapiclint != 1))
+ BUG();
+}
+
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+ printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk("MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+ if (m->trans_quad+1 > numnodes)
+ numnodes = m->trans_quad+1;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+ unsigned short oemsize)
+{
+ int count = sizeof (*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+
+ printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+ {
+ printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ oemtable->oem_signature[0],
+ oemtable->oem_signature[1],
+ oemtable->oem_signature[2],
+ oemtable->oem_signature[3]);
+ return;
+ }
+ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+ {
+ printk("SMP oem mptable: checksum error!\n");
+ return;
+ }
+ while (count < oemtable->oem_length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_config_translation *m=
+ (struct mpc_config_translation *)oemptr;
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ {
+ printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+ char oem[16], prod[14];
+ int count=sizeof(*mpc);
+ unsigned char *mpt=((unsigned char *)mpc)+count;
+ int num_bus = 0;
+ int num_irq = 0;
+ unsigned char *bus_data;
+
+ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+ panic("SMP mptable: bad signature [%c%c%c%c]!\n",
+ mpc->mpc_signature[0],
+ mpc->mpc_signature[1],
+ mpc->mpc_signature[2],
+ mpc->mpc_signature[3]);
+ return 0;
+ }
+ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+ panic("SMP mptable: checksum error!\n");
+ return 0;
+ }
+ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+ mpc->mpc_spec);
+ return 0;
+ }
+ if (!mpc->mpc_lapic) {
+ printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+ return 0;
+ }
+ memcpy(oem,mpc->mpc_oem,8);
+ oem[8]=0;
+ printk("OEM ID: %s ",oem);
+
+ memcpy(prod,mpc->mpc_productid,12);
+ prod[12]=0;
+ printk("Product ID: %s ",prod);
+
+ detect_clustered_apic(oem, prod);
+
+ printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+ /* save the local APIC address, it might be non-default,
+ * but only if we're not using the ACPI tables
+ */
+ if (!have_acpi_tables)
+ mp_lapic_addr = mpc->mpc_lapic;
+
+ if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
+ /* We need to process the oem mpc tables to tell us which quad things are in ... */
+ mpc_record = 0;
+ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize);
+ mpc_record = 0;
+ }
+
+ /* Pre-scan to determine the number of bus and
+ * interrupts records we have
+ */
+ while (count < mpc->mpc_length) {
+ switch (*mpt) {
+ case MP_PROCESSOR:
+ mpt += sizeof(struct mpc_config_processor);
+ count += sizeof(struct mpc_config_processor);
+ break;
+ case MP_BUS:
+ ++num_bus;
+ mpt += sizeof(struct mpc_config_bus);
+ count += sizeof(struct mpc_config_bus);
+ break;
+ case MP_INTSRC:
+ ++num_irq;
+ mpt += sizeof(struct mpc_config_intsrc);
+ count += sizeof(struct mpc_config_intsrc);
+ break;
+ case MP_IOAPIC:
+ mpt += sizeof(struct mpc_config_ioapic);
+ count += sizeof(struct mpc_config_ioapic);
+ break;
+ case MP_LINTSRC:
+ mpt += sizeof(struct mpc_config_lintsrc);
+ count += sizeof(struct mpc_config_lintsrc);
+ break;
+ default:
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ /*
+ * Paranoia: Allocate one extra of both the number of busses and number
+ * of irqs, and make sure that we have at least 4 interrupts per PCI
+ * slot. But some machines do not report very many busses, so we need
+ * to fall back on the older defaults.
+ */
+ ++num_bus;
+ max_mp_busses = max(num_bus, MAX_MP_BUSSES);
+ if (num_irq < (4 * max_mp_busses))
+ num_irq = 4 * num_bus; /* 4 intr/PCI slot */
+ ++num_irq;
+ max_irq_sources = max(num_irq, MAX_IRQ_SOURCES);
+
+ count = (max_mp_busses * sizeof(int)) * 4;
+ count += (max_irq_sources * sizeof(struct mpc_config_intsrc));
+
+ {
+ //bus_data = alloc_bootmem(count); XXX Xen
+ static char arr[4096];
+ if(count > 4096) BUG();
+ bus_data = (void*)arr;
+
+ }
+ if (!bus_data) {
+ printk(KERN_ERR "SMP mptable: out of memory!\n");
+ return 0;
+ }
+ mp_bus_id_to_type = (int *)&bus_data[0];
+ mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))];
+ mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2];
+ mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3];
+ mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4];
+ memset(mp_bus_id_to_pci_bus, -1, max_mp_busses);
+
+ /*
+ * Now process the configuration blocks.
+ */
+ count = sizeof(*mpc);
+ mpt = ((unsigned char *)mpc)+count;
+ while (count < mpc->mpc_length) {
+ switch(*mpt) {
+ case MP_PROCESSOR:
+ {
+ struct mpc_config_processor *m=
+ (struct mpc_config_processor *)mpt;
+
+ /* ACPI may already have provided this one for us */
+ if (!have_acpi_tables)
+ MP_processor_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_BUS:
+ {
+ struct mpc_config_bus *m=
+ (struct mpc_config_bus *)mpt;
+ MP_bus_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_IOAPIC:
+ {
+ struct mpc_config_ioapic *m=
+ (struct mpc_config_ioapic *)mpt;
+ MP_ioapic_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_INTSRC:
+ {
+ struct mpc_config_intsrc *m=
+ (struct mpc_config_intsrc *)mpt;
+
+ MP_intsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_LINTSRC:
+ {
+ struct mpc_config_lintsrc *m=
+ (struct mpc_config_lintsrc *)mpt;
+ MP_lintsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ default:
+ {
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ ++mpc_record;
+ }
+
+ if (clustered_apic_mode){
+ phys_cpu_present_map = logical_cpu_present_map;
+ }
+
+
+ printk("Enabling APIC mode: ");
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ printk("Clustered Logical. ");
+ else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+ printk("Physical. ");
+ else
+ printk("Flat. ");
+ printk("Using %d I/O APICs\n",nr_ioapics);
+
+ if (!num_processors)
+ printk(KERN_ERR "SMP mptable: no processors registered!\n");
+ return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+ unsigned int port;
+
+ port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+ struct mpc_config_intsrc intsrc;
+ int i;
+ int ELCR_fallback = 0;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* conforming */
+ intsrc.mpc_srcbus = 0;
+ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+ intsrc.mpc_irqtype = mp_INT;
+
+ /*
+ * If true, we have an ISA/PCI system with no IRQ entries
+ * in the MP table. To prevent the PCI interrupts from being set up
+ * incorrectly, we try to use the ELCR. The sanity check to see if
+ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+ * never be level sensitive, so we simply see if the ELCR agrees.
+ * If it does, we assume it's valid.
+ */
+ if (mpc_default_type == 5) {
+ printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+ printk("ELCR contains invalid data... not using ELCR\n");
+ else {
+ printk("Using ELCR to identify PCI interrupts\n");
+ ELCR_fallback = 1;
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ switch (mpc_default_type) {
+ case 2:
+ if (i == 0 || i == 13)
+ continue; /* IRQ0 & IRQ13 not connected */
+ /* fall through */
+ default:
+ if (i == 2)
+ continue; /* IRQ2 is never connected */
+ }
+
+ if (ELCR_fallback) {
+ /*
+ * If the ELCR indicates a level-sensitive interrupt, we
+ * copy that information over to the MP table in the
+ * irqflag field (level sensitive, active high polarity).
+ */
+ if (ELCR_trigger(i))
+ intsrc.mpc_irqflag = 13;
+ else
+ intsrc.mpc_irqflag = 0;
+ }
+
+ intsrc.mpc_srcbusirq = i;
+ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
+ MP_intsrc_info(&intsrc);
+ }
+
+ intsrc.mpc_irqtype = mp_ExtINT;
+ intsrc.mpc_srcbusirq = 0;
+ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
+ MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+ struct mpc_config_processor processor;
+ struct mpc_config_bus bus;
+ struct mpc_config_ioapic ioapic;
+ struct mpc_config_lintsrc lintsrc;
+ int linttypes[2] = { mp_ExtINT, mp_NMI };
+ int i;
+
+ /*
+ * local APIC has default address
+ */
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /*
+ * 2 CPUs, numbered 0 & 1.
+ */
+ processor.mpc_type = MP_PROCESSOR;
+ /* Either an integrated APIC or a discrete 82489DX. */
+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ processor.mpc_cpuflag = CPU_ENABLED;
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) |
+ boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ for (i = 0; i < 2; i++) {
+ processor.mpc_apicid = i;
+ MP_processor_info(&processor);
+ }
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ switch (mpc_default_type) {
+ default:
+ printk("???\nUnknown standard configuration %d\n",
+ mpc_default_type);
+ /* fall through */
+ case 1:
+ case 5:
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ break;
+ case 2:
+ case 6:
+ case 3:
+ memcpy(bus.mpc_bustype, "EISA ", 6);
+ break;
+ case 4:
+ case 7:
+ memcpy(bus.mpc_bustype, "MCA ", 6);
+ }
+ MP_bus_info(&bus);
+ if (mpc_default_type > 4) {
+ bus.mpc_busid = 1;
+ memcpy(bus.mpc_bustype, "PCI ", 6);
+ MP_bus_info(&bus);
+ }
+
+ ioapic.mpc_type = MP_IOAPIC;
+ ioapic.mpc_apicid = 2;
+ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ ioapic.mpc_flags = MPC_APIC_USABLE;
+ ioapic.mpc_apicaddr = 0xFEC00000;
+ MP_ioapic_info(&ioapic);
+
+ /*
+ * We set up most of the low 16 IO-APIC pins according to MPS rules.
+ */
+ construct_default_ioirq_mptable(mpc_default_type);
+
+ lintsrc.mpc_type = MP_LINTSRC;
+ lintsrc.mpc_irqflag = 0; /* conforming */
+ lintsrc.mpc_srcbusid = 0;
+ lintsrc.mpc_srcbusirq = 0;
+ lintsrc.mpc_destapic = MP_APIC_ALL;
+ for (i = 0; i < 2; i++) {
+ lintsrc.mpc_irqtype = linttypes[i];
+ lintsrc.mpc_destapiclint = i;
+ MP_lintsrc_info(&lintsrc);
+ }
+}
+
+static struct intel_mp_floating *mpf_found;
+extern void config_acpi_tables(void);
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+ struct intel_mp_floating *mpf = mpf_found;
+
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Check if the ACPI tables are provided. Use them only to get
+ * the processor information, mainly because it provides
+ * the info on the logical processor(s), rather than the physical
+ * processor(s) that are provided by the MPS. We attempt to
+ * check only if the user provided a commandline override
+ */
+ config_acpi_tables();
+#endif
+
+ printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+ if (mpf->mpf_feature2 & (1<<7)) {
+ printk(" IMCR and PIC compatibility mode.\n");
+ pic_mode = 1;
+ } else {
+ printk(" Virtual Wire compatibility mode.\n");
+ pic_mode = 0;
+ }
+
+ /*
+ * Now see if we need to read further.
+ */
+ if (mpf->mpf_feature1 != 0) {
+
+ printk("Default MP configuration #%d\n", mpf->mpf_feature1);
+ construct_default_ISA_mptable(mpf->mpf_feature1);
+
+ } else if (mpf->mpf_physptr) {
+
+ /*
+ * Read the physical hardware table. Anything here will
+ * override the defaults.
+ */
+ if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+ smp_found_config = 0;
+ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+ return;
+ }
+ /*
+ * If there are no explicit MP IRQ entries, then we are
+ * broken. We set up most of the low 16 IO-APIC pins to
+ * ISA defaults and hope it will work.
+ */
+ if (!mp_irq_entries) {
+ struct mpc_config_bus bus;
+
+ printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ MP_bus_info(&bus);
+
+ construct_default_ioirq_mptable(0);
+ }
+
+ } else
+ BUG();
+
+ printk("Processors: %d\n", num_processors);
+ /*
+ * Only use the first configuration found.
+ */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+ unsigned long *bp = phys_to_virt(base);
+ struct intel_mp_floating *mpf;
+
+ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+ if (sizeof(*mpf) != 16)
+ printk("Error: MPF size\n");
+
+ while (length > 0) {
+ mpf = (struct intel_mp_floating *)bp;
+ if ((*bp == SMP_MAGIC_IDENT) &&
+ (mpf->mpf_length == 1) &&
+ !mpf_checksum((unsigned char *)bp, 16) &&
+ ((mpf->mpf_specification == 1)
+ || (mpf->mpf_specification == 4)) ) {
+
+ smp_found_config = 1;
+ printk("found SMP MP-table at %08lx\n",
+ virt_to_phys(mpf));
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+ if (mpf->mpf_physptr)
+ reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
+ mpf_found = mpf;
+ return 1;
+ }
+ bp += 4;
+ length -= 16;
+ }
+ return 0;
+}
+
+void __init find_intel_smp (void)
+{
+ unsigned int address;
+
+ /*
+ * FIXME: Linux assumes you have 640K of base ram..
+ * this continues the error...
+ *
+ * 1) Scan the bottom 1K for a signature
+ * 2) Scan the top 1K of base RAM
+ * 3) Scan the 64K of bios
+ */
+ if (smp_scan_config(0x0,0x400) ||
+ smp_scan_config(639*0x400,0x400) ||
+ smp_scan_config(0xF0000,0x10000))
+ return;
+ /*
+ * If it is an SMP machine we should know now, unless the
+ * configuration is in an EISA/MCA bus machine with an
+ * extended bios data area.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+ *
+ * NOTE! There were Linux loaders that will corrupt the EBDA
+ * area, and as such this kind of SMP config may be less
+ * trustworthy, simply because the SMP table may have been
+ * stomped on during early boot. Thankfully the bootloaders
+ * now honour the EBDA.
+ */
+
+ address = *(unsigned short *)phys_to_virt(0x40E);
+ address <<= 4;
+ smp_scan_config(address, 0x1000);
+}
+
+#else
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+void __init find_visws_smp(void)
+{
+ smp_found_config = 1;
+
+ phys_cpu_present_map |= 2; /* or in id 1 */
+ apic_version[1] |= 0x10; /* integrated APIC */
+ apic_version[0] |= 0x10;
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+}
+
+#endif
+
+/*
+ * - Intel MP Configuration Table
+ * - or SGI Visual Workstation configuration
+ */
+void __init find_smp_config (void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ find_intel_smp();
+#endif
+#ifdef CONFIG_VISWS
+ find_visws_smp();
+#endif
+}
+
diff --git a/xen/arch/i386/pci-dma.c b/xen/arch/i386/pci-dma.c
new file mode 100644
index 0000000000..9d19cea867
--- /dev/null
+++ b/xen/arch/i386/pci-dma.c
@@ -0,0 +1,37 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On i386 there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/lib.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+ dma_addr_t *dma_handle)
+{
+ void *ret;
+ int gfp = GFP_ATOMIC;
+
+ if (hwdev == NULL || ((u32)hwdev->dma_mask < 0xffffffff))
+ gfp |= GFP_DMA;
+ ret = (void *)__get_free_pages(gfp, get_order(size));
+
+ if (ret != NULL) {
+ memset(ret, 0, size);
+ *dma_handle = virt_to_bus(ret);
+ }
+ return ret;
+}
+
+void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ free_pages((unsigned long)vaddr, get_order(size));
+}
diff --git a/xen/arch/i386/pci-i386.c b/xen/arch/i386/pci-i386.c
new file mode 100644
index 0000000000..7a213d824c
--- /dev/null
+++ b/xen/arch/i386/pci-i386.c
@@ -0,0 +1,391 @@
+/*
+ * Low-Level PCI Access for i386 machines
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * Drew@Colorado.EDU
+ * +1 (303) 786-7975
+ *
+ * Drew's work was sponsored by:
+ * iX Multiuser Multitasking Magazine
+ * Hannover, Germany
+ * hm@ix.de
+ *
+ * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ * Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ *
+ * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic
+ * Potter, potter@cao-vlsi.ibp.fr
+ *
+ * Jan 10, 1995 : Modified to store the information about configured pci
+ * devices into a list, which can be accessed via /proc/pci by
+ * Curtis Varner, cvarner@cs.ucr.edu
+ *
+ * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
+ * Alpha version. Intel & UMC chipset support only.
+ *
+ * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
+ * moved to drivers/pci/pci.c.
+ *
+ * Dec 7, 1996 : Added support for direct configuration access of boards
+ * with Intel compatible access schemes (tsbogend@alpha.franken.de)
+ *
+ * Feb 3, 1997 : Set internal functions to static, save/restore flags
+ * avoid dead locks reading broken PCI BIOS, werner@suse.de
+ *
+ * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
+ * (mj@atrey.karlin.mff.cuni.cz)
+ *
+ * May 7, 1997 : Added some missing cli()'s. [mj]
+ *
+ * Jun 20, 1997 : Corrected problems in "conf1" type accesses.
+ * (paubert@iram.es)
+ *
+ * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts
+ * and cleaned it up... Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj]
+ *
+ * May 1, 1998 : Support for peer host bridges. [mj]
+ *
+ * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space
+ * can be accessed from interrupts even on SMP systems. [mj]
+ *
+ * August 1998 : Better support for peer host bridges and more paranoid
+ * checks for direct hardware access. Ugh, this file starts to look as
+ * a large gallery of common hardware bug workarounds (watch the comments)
+ * -- the PCI specs themselves are sane, but most implementors should be
+ * hit hard with \hammer scaled \magstep5. [mj]
+ *
+ * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
+ *
+ * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj]
+ *
+ * August 1999 : New resource management and configuration access stuff. [mj]
+ *
+ * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges.
+ * Based on ideas by Chris Frantz and David Hinds. [mj]
+ *
+ * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi
+ * for a lot of patience during testing. [mj]
+ *
+ * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj]
+ */
+
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+
+#include "pci-i386.h"
+
+void
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+ struct resource *res, int resource)
+{
+ u32 new, check;
+ int reg;
+
+ new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
+ if (resource < 6) {
+ reg = PCI_BASE_ADDRESS_0 + 4*resource;
+ } else if (resource == PCI_ROM_RESOURCE) {
+ res->flags |= PCI_ROM_ADDRESS_ENABLE;
+ new |= PCI_ROM_ADDRESS_ENABLE;
+ reg = dev->rom_base_reg;
+ } else {
+ /* Somebody might have asked allocation of a non-standard resource */
+ return;
+ }
+
+ pci_write_config_dword(dev, reg, new);
+ pci_read_config_dword(dev, reg, &check);
+ if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
+ printk(KERN_ERR "PCI: Error while updating region "
+ "%s/%d (%08x != %08x)\n", dev->slot_name, resource,
+ new, check);
+ }
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void
+pcibios_align_resource(void *data, struct resource *res,
+ unsigned long size, unsigned long align)
+{
+ if (res->flags & IORESOURCE_IO) {
+ unsigned long start = res->start;
+
+ if (start & 0x300) {
+ start = (start + 0x3ff) & ~0x3ff;
+ res->start = start;
+ }
+ }
+}
+
+
+/*
+ * Handle resources of PCI devices. If the world were perfect, we could
+ * just allocate all the resource regions and do nothing more. It isn't.
+ * On the other hand, we cannot just re-allocate all devices, as it would
+ * require us to know lots of host bridge internals. So we attempt to
+ * keep as much of the original configuration as possible, but tweak it
+ * when it's found to be wrong.
+ *
+ * Known BIOS problems we have to work around:
+ * - I/O or memory regions not configured
+ * - regions configured, but not enabled in the command register
+ * - bogus I/O addresses above 64K used
+ * - expansion ROMs left enabled (this may sound harmless, but given
+ * the fact the PCI specs explicitly allow address decoders to be
+ * shared between expansion ROMs and other resource regions, it's
+ * at least dangerous)
+ *
+ * Our solution:
+ * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ * This gives us fixed barriers on where we can allocate.
+ * (2) Allocate resources for all enabled devices. If there is
+ * a collision, just mark the resource as unallocated. Also
+ * disable expansion ROMs during this step.
+ * (3) Try to allocate resources for disabled devices. If the
+ * resources were assigned correctly, everything goes well,
+ * if they weren't, they won't disturb allocation of other
+ * resources.
+ * (4) Assign new addresses to resources which were either
+ * not configured at all or misconfigured. If explicitly
+ * requested by the user, configure expansion ROM address
+ * as well.
+ */
+
+static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
+{
+ struct list_head *ln;
+ struct pci_bus *bus;
+ struct pci_dev *dev;
+ int idx;
+ struct resource *r, *pr;
+
+ /* Depth-First Search on bus tree */
+ for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
+ bus = pci_bus_b(ln);
+ if ((dev = bus->self)) {
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+ r = &dev->resource[idx];
+ if (!r->start)
+ continue;
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || request_resource(pr, r) < 0)
+ printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name);
+ }
+ }
+ pcibios_allocate_bus_resources(&bus->children);
+ }
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+ struct pci_dev *dev;
+ int idx, disabled;
+ u16 command;
+ struct resource *r, *pr;
+
+ pci_for_each_dev(dev) {
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ for(idx = 0; idx < 6; idx++) {
+ r = &dev->resource[idx];
+ if (r->parent) /* Already allocated */
+ continue;
+ if (!r->start) /* Address not assigned at all */
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ disabled = !(command & PCI_COMMAND_IO);
+ else
+ disabled = !(command & PCI_COMMAND_MEMORY);
+ if (pass == disabled) {
+ DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
+ r->start, r->end, r->flags, disabled, pass);
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || request_resource(pr, r) < 0) {
+ printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name);
+ /* We'll assign a new address later */
+ r->end -= r->start;
+ r->start = 0;
+ }
+ }
+ }
+ if (!pass) {
+ r = &dev->resource[PCI_ROM_RESOURCE];
+ if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
+ /* Turn the ROM off, leave the resource region, but keep it unregistered. */
+ u32 reg;
+ DBG("PCI: Switching off ROM of %s\n", dev->slot_name);
+ r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
+ pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+ pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
+ }
+ }
+ }
+}
+
+static void __init pcibios_assign_resources(void)
+{
+ struct pci_dev *dev;
+ int idx;
+ struct resource *r;
+
+ pci_for_each_dev(dev) {
+ int class = dev->class >> 8;
+
+ /* Don't touch classless devices and host bridges */
+ if (!class || class == PCI_CLASS_BRIDGE_HOST)
+ continue;
+
+ for(idx=0; idx<6; idx++) {
+ r = &dev->resource[idx];
+
+ /*
+ * Don't touch IDE controllers and I/O ports of video cards!
+ */
+ if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
+ (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
+ continue;
+
+ /*
+ * We shall assign a new address to this resource, either because
+ * the BIOS forgot to do so or because we have decided the old
+ * address was unusable for some reason.
+ */
+ if (!r->start && r->end)
+ pci_assign_resource(dev, idx);
+ }
+
+ if (pci_probe & PCI_ASSIGN_ROMS) {
+ r = &dev->resource[PCI_ROM_RESOURCE];
+ r->end -= r->start;
+ r->start = 0;
+ if (r->end)
+ pci_assign_resource(dev, PCI_ROM_RESOURCE);
+ }
+ }
+}
+
+void __init pcibios_resource_survey(void)
+{
+ DBG("PCI: Allocating resources\n");
+ pcibios_allocate_bus_resources(&pci_root_buses);
+ pcibios_allocate_resources(0);
+ pcibios_allocate_resources(1);
+ pcibios_assign_resources();
+}
+
+int pcibios_enable_resources(struct pci_dev *dev, int mask)
+{
+ u16 cmd, old_cmd;
+ int idx;
+ struct resource *r;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ old_cmd = cmd;
+ for(idx=0; idx<6; idx++) {
+ /* Only set up the requested stuff */
+ if (!(mask & (1<<idx)))
+ continue;
+
+ r = &dev->resource[idx];
+ if (!r->start && r->end) {
+ printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
+ return -EINVAL;
+ }
+ if (r->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (r->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+ if (dev->resource[PCI_ROM_RESOURCE].start)
+ cmd |= PCI_COMMAND_MEMORY;
+ if (cmd != old_cmd) {
+ printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+/*
+ * If we set up a device for bus mastering, we need to check the latency
+ * timer as certain crappy BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+ u8 lat;
+ pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+ if (lat < 16)
+ lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+ else if (lat > pcibios_max_latency)
+ lat = pcibios_max_latency;
+ else
+ return;
+ printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat);
+ pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+#if 0
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine)
+{
+ unsigned long prot;
+
+ /* I/O space cannot be accessed via normal processor loads and
+ * stores on this platform.
+ */
+ if (mmap_state == pci_mmap_io)
+ return -EINVAL;
+
+ /* Leave vm_pgoff as-is, the PCI space address is the physical
+ * address on this platform.
+ */
+ vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
+
+ prot = pgprot_val(vma->vm_page_prot);
+ if (boot_cpu_data.x86 > 3)
+ prot |= _PAGE_PCD | _PAGE_PWT;
+ vma->vm_page_prot = __pgprot(prot);
+
+ /* Write-combine setting is ignored, it is changed via the mtrr
+ * interfaces on this platform.
+ */
+ if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+#endif
diff --git a/xen/arch/i386/pci-i386.h b/xen/arch/i386/pci-i386.h
new file mode 100644
index 0000000000..2d051c51b2
--- /dev/null
+++ b/xen/arch/i386/pci-i386.h
@@ -0,0 +1,69 @@
+/*
+ * Low-Level PCI Access for i386 machines.
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS 0x0001
+#define PCI_PROBE_CONF1 0x0002
+#define PCI_PROBE_CONF2 0x0004
+#define PCI_NO_SORT 0x0100
+#define PCI_BIOS_SORT 0x0200
+#define PCI_NO_CHECKS 0x0400
+#define PCI_ASSIGN_ROMS 0x1000
+#define PCI_BIOS_IRQ_SCAN 0x2000
+#define PCI_ASSIGN_ALL_BUSSES 0x4000
+
+extern unsigned int pci_probe;
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+
+void pcibios_resource_survey(void);
+int pcibios_enable_resources(struct pci_dev *, int);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops *pci_root_ops;
+
+/* pci-irq.c */
+
+struct irq_info {
+ u8 bus, devfn; /* Bus, device and function */
+ struct {
+ u8 link; /* IRQ line ID, chipset dependent, 0=not routed */
+ u16 bitmap; /* Available IRQs */
+ } __attribute__((packed)) irq[4];
+ u8 slot; /* Slot number, 0=onboard */
+ u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+ u32 signature; /* PIRQ_SIGNATURE should be here */
+ u16 version; /* PIRQ_VERSION */
+ u16 size; /* Table size in bytes */
+ u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
+ u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */
+ u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */
+ u32 miniport_data; /* Crap */
+ u8 rfu[11];
+ u8 checksum; /* Modulo 256 checksum must give zero */
+ struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+void pcibios_irq_init(void);
+void pcibios_fixup_irqs(void);
+void pcibios_enable_irq(struct pci_dev *dev);
diff --git a/xen/arch/i386/pci-irq.c b/xen/arch/i386/pci-irq.c
new file mode 100644
index 0000000000..b7a212b014
--- /dev/null
+++ b/xen/arch/i386/pci-irq.c
@@ -0,0 +1,795 @@
+/*
+ * Low-Level PCI Support for PC -- Routing of Interrupts
+ *
+ * (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/io_apic.h>
+
+#include "pci-i386.h"
+
+#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
+#define PIRQ_VERSION 0x0100
+
+int broken_hp_bios_irq9;
+
+static struct irq_routing_table *pirq_table;
+
+/*
+ * Never use: 0, 1, 2 (timer, keyboard, and cascade)
+ * Avoid using: 13, 14 and 15 (FP error and IDE).
+ * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
+ */
+unsigned int pcibios_irq_mask = 0xfff8;
+
+static int pirq_penalty[16] = {
+ 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
+ 0, 0, 0, 0, 1000, 100000, 100000, 100000
+};
+
+struct irq_router {
+ char *name;
+ u16 vendor, device;
+ int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
+ int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+};
+
+/*
+ * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
+ */
+
+static struct irq_routing_table * __init pirq_find_routing_table(void)
+{
+ u8 *addr;
+ struct irq_routing_table *rt;
+ int i;
+ u8 sum;
+
+ for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
+ rt = (struct irq_routing_table *) addr;
+ if (rt->signature != PIRQ_SIGNATURE ||
+ rt->version != PIRQ_VERSION ||
+ rt->size % 16 ||
+ rt->size < sizeof(struct irq_routing_table))
+ continue;
+ sum = 0;
+ for(i=0; i<rt->size; i++)
+ sum += addr[i];
+ if (!sum) {
+ DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+ return rt;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * If we have a IRQ routing table, use it to search for peer host
+ * bridges. It's a gross hack, but since there are no other known
+ * ways how to get a list of buses, we have to go this way.
+ */
+
+static void __init pirq_peer_trick(void)
+{
+ struct irq_routing_table *rt = pirq_table;
+ u8 busmap[256];
+ int i;
+ struct irq_info *e;
+
+ memset(busmap, 0, sizeof(busmap));
+ for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
+ e = &rt->slots[i];
+#ifdef DEBUG
+ {
+ int j;
+ DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+ for(j=0; j<4; j++)
+ DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
+ DBG("\n");
+ }
+#endif
+ busmap[e->bus] = 1;
+ }
+ for(i=1; i<256; i++)
+ /*
+ * It might be a secondary bus, but in this case its parent is already
+ * known (ascending bus order) and therefore pci_scan_bus returns immediately.
+ */
+ if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL))
+ printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+ pcibios_last_bus = -1;
+}
+
+/*
+ * Code for querying and setting of IRQ routes on various interrupt routers.
+ */
+
+static void eisa_set_level_irq(unsigned int irq)
+{
+ unsigned char mask = 1 << (irq & 7);
+ unsigned int port = 0x4d0 + (irq >> 3);
+ unsigned char val = inb(port);
+
+ if (!(val & mask)) {
+ DBG(" -> edge");
+ outb(val | mask, port);
+ }
+}
+
+/*
+ * Common IRQ routing practice: nybbles in config space,
+ * offset by some magic constant.
+ */
+static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
+{
+ u8 x;
+ unsigned reg = offset + (nr >> 1);
+
+ pci_read_config_byte(router, reg, &x);
+ return (nr & 1) ? (x >> 4) : (x & 0xf);
+}
+
+static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+{
+ u8 x;
+ unsigned reg = offset + (nr >> 1);
+
+ pci_read_config_byte(router, reg, &x);
+ x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
+ pci_write_config_byte(router, reg, x);
+}
+
+/*
+ * ALI pirq entries are damn ugly, and completely undocumented.
+ * This has been figured out from pirq tables, and it's not a pretty
+ * picture.
+ */
+static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+
+ return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+}
+
+static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+ unsigned int val = irqmap[irq];
+
+ if (val) {
+ write_config_nybble(router, 0x48, pirq-1, val);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
+ * just a pointer to the config space.
+ */
+static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 x;
+
+ pci_read_config_byte(router, pirq, &x);
+ return (x < 16) ? x : 0;
+}
+
+static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ pci_write_config_byte(router, pirq, irq);
+ return 1;
+}
+
+/*
+ * The VIA pirq rules are nibble-based, like ALI,
+ * but without the ugly irq number munging.
+ */
+static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0x55, pirq);
+}
+
+static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0x55, pirq, irq);
+ return 1;
+}
+
+/*
+ * ITE 8330G pirq rules are nibble-based
+ * FIXME: pirqmap may be { 1, 0, 3, 2 },
+ * 2+3 are both mapped to irq 9 on my system
+ */
+static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ return read_config_nybble(router,0x43, pirqmap[pirq-1]);
+}
+
+static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
+ return 1;
+}
+
+/*
+ * OPTI: high four bits are nibble pointer..
+ * I wonder what the low bits do?
+ */
+static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0xb8, pirq >> 4);
+}
+
+static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0xb8, pirq >> 4, irq);
+ return 1;
+}
+
+/*
+ * Cyrix: nibble offset 0x5C
+ */
+static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0x5C, (pirq-1)^1);
+}
+
+static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
+ return 1;
+}
+
+/*
+ * PIRQ routing for SiS 85C503 router used in several SiS chipsets
+ * According to the SiS 5595 datasheet (preliminary V1.0, 12/24/1997)
+ * the related registers work as follows:
+ *
+ * general: one byte per re-routable IRQ,
+ * bit 7 IRQ mapping enabled (0) or disabled (1)
+ * bits [6:4] reserved
+ * bits [3:0] IRQ to map to
+ * allowed: 3-7, 9-12, 14-15
+ * reserved: 0, 1, 2, 8, 13
+ *
+ * individual registers in device config space:
+ *
+ * 0x41/0x42/0x43/0x44: PCI INT A/B/C/D - bits as in general case
+ *
+ * 0x61: IDEIRQ: bits as in general case - but:
+ * bits [6:5] must be written 01
+ * bit 4 channel-select primary (0), secondary (1)
+ *
+ * 0x62: USBIRQ: bits as in general case - but:
+ * bit 4 OHCI function disabled (0), enabled (1)
+ *
+ * 0x6a: ACPI/SCI IRQ - bits as in general case
+ *
+ * 0x7e: Data Acq. Module IRQ - bits as in general case
+ *
+ * Apparently there are systems implementing PCI routing table using both
+ * link values 0x01-0x04 and 0x41-0x44 for PCI INTA..D, but register offsets
+ * like 0x62 as link values for USBIRQ e.g. So there is no simple
+ * "register = offset + pirq" relation.
+ * Currently we support PCI INTA..D and USBIRQ and try our best to handle
+ * both link mappings.
+ * IDE/ACPI/DAQ mapping is currently unsupported (left untouched as set by BIOS).
+ */
+
+static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 x;
+ int reg = pirq;
+
+ switch(pirq) {
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ reg += 0x40;
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x62:
+ pci_read_config_byte(router, reg, &x);
+ if (reg != 0x62)
+ break;
+ if (!(x & 0x40))
+ return 0;
+ break;
+ case 0x61:
+ case 0x6a:
+ case 0x7e:
+ printk(KERN_INFO "SiS pirq: advanced IDE/ACPI/DAQ mapping not yet implemented\n");
+ return 0;
+ default:
+ printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ return (x & 0x80) ? 0 : (x & 0x0f);
+}
+
+static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ u8 x;
+ int reg = pirq;
+
+ switch(pirq) {
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ reg += 0x40;
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x62:
+ x = (irq&0x0f) ? (irq&0x0f) : 0x80;
+ if (reg != 0x62)
+ break;
+ /* always mark OHCI enabled, as nothing else knows about this */
+ x |= 0x40;
+ break;
+ case 0x61:
+ case 0x6a:
+ case 0x7e:
+ printk(KERN_INFO "advanced SiS pirq mapping not yet implemented\n");
+ return 0;
+ default:
+ printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ pci_write_config_byte(router, reg, x);
+
+ return 1;
+}
+
+/*
+ * VLSI: nibble offset 0x74 - educated guess due to routing table and
+ * config space of VLSI 82C534 PCI-bridge/router (1004:0102)
+ * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
+ * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
+ * for the busbridge to the docking station.
+ */
+
+static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ if (pirq > 8) {
+ printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ return read_config_nybble(router, 0x74, pirq-1);
+}
+
+static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ if (pirq > 8) {
+ printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ write_config_nybble(router, 0x74, pirq-1, irq);
+ return 1;
+}
+
+/*
+ * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
+ * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register
+ * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect
+ * register is a straight binary coding of desired PIC IRQ (low nibble).
+ *
+ * The 'link' value in the PIRQ table is already in the correct format
+ * for the Index register. There are some special index values:
+ * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
+ * and 0x03 for SMBus.
+ */
+static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ outb_p(pirq, 0xc00);
+ return inb(0xc01) & 0xf;
+}
+
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ outb_p(pirq, 0xc00);
+ outb_p(irq, 0xc01);
+ return 1;
+}
+
+/* Support for AMD756 PCI IRQ Routing
+ * Jhon H. Caicedo <jhcaiced@osso.org.co>
+ * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
+ * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
+ * The AMD756 pirq rules are nibble-based
+ * offset 0x56 0-3 PIRQA 4-7 PIRQB
+ * offset 0x57 0-3 PIRQC 4-7 PIRQD
+ */
+static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 irq;
+ irq = 0;
+ if (pirq <= 4)
+ {
+ irq = read_config_nybble(router, 0x56, pirq - 1);
+ }
+ printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
+ dev->vendor, dev->device, pirq, irq);
+ return irq;
+}
+
+static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
+ dev->vendor, dev->device, pirq, irq);
+ if (pirq <= 4)
+ {
+ write_config_nybble(router, 0x56, pirq - 1, irq);
+ }
+ return 1;
+}
+
+#ifdef CONFIG_PCI_BIOS
+
+static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ struct pci_dev *bridge;
+ int pin = pci_get_interrupt_pin(dev, &bridge);
+ return pcibios_set_irq_routing(bridge, pin, irq);
+}
+
+static struct irq_router pirq_bios_router =
+ { "BIOS", 0, 0, NULL, pirq_bios_set };
+
+#endif
+
+static struct irq_router pirq_routers[] = {
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_10, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, pirq_piix_get, pirq_piix_set },
+ { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801E_0, pirq_piix_get, pirq_piix_set },
+
+ { "ALI", PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, pirq_ali_get, pirq_ali_set },
+
+ { "ITE", PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_IT8330G_0, pirq_ite_get, pirq_ite_set },
+
+ { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, pirq_via_get, pirq_via_set },
+ { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, pirq_via_get, pirq_via_set },
+ { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, pirq_via_get, pirq_via_set },
+
+ { "OPTI", PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C700, pirq_opti_get, pirq_opti_set },
+
+ { "NatSemi", PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, pirq_cyrix_get, pirq_cyrix_set },
+ { "SIS", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, pirq_sis_get, pirq_sis_set },
+ { "VLSI 82C534", PCI_VENDOR_ID_VLSI, PCI_DEVICE_ID_VLSI_82C534, pirq_vlsi_get, pirq_vlsi_set },
+ { "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4,
+ pirq_serverworks_get, pirq_serverworks_set },
+ { "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5,
+ pirq_serverworks_get, pirq_serverworks_set },
+ { "AMD756 VIPER", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_740B,
+ pirq_amd756_get, pirq_amd756_set },
+ { "AMD766", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7413,
+ pirq_amd756_get, pirq_amd756_set },
+ { "AMD768", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7443,
+ pirq_amd756_get, pirq_amd756_set },
+
+ { "default", 0, 0, NULL, NULL }
+};
+
+static struct irq_router *pirq_router;
+static struct pci_dev *pirq_router_dev;
+
+static void __init pirq_find_router(void)
+{
+ struct irq_routing_table *rt = pirq_table;
+ struct irq_router *r;
+
+#ifdef CONFIG_PCI_BIOS
+ if (!rt->signature) {
+ printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
+ pirq_router = &pirq_bios_router;
+ return;
+ }
+#endif
+
+ DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
+ rt->rtr_vendor, rt->rtr_device);
+
+ /* fall back to default router if nothing else found */
+ pirq_router = &pirq_routers[ARRAY_SIZE(pirq_routers) - 1];
+
+ pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+ if (!pirq_router_dev) {
+ DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
+ return;
+ }
+
+ for(r=pirq_routers; r->vendor; r++) {
+ /* Exact match against router table entry? Use it! */
+ if (r->vendor == rt->rtr_vendor && r->device == rt->rtr_device) {
+ pirq_router = r;
+ break;
+ }
+ /* Match against router device entry? Use it as a fallback */
+ if (r->vendor == pirq_router_dev->vendor && r->device == pirq_router_dev->device) {
+ pirq_router = r;
+ }
+ }
+ printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
+ pirq_router->name,
+ pirq_router_dev->vendor,
+ pirq_router_dev->device,
+ pirq_router_dev->slot_name);
+}
+
+static struct irq_info *pirq_get_info(struct pci_dev *dev)
+{
+ struct irq_routing_table *rt = pirq_table;
+ int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+ struct irq_info *info;
+
+ for (info = rt->slots; entries--; info++)
+ if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+ return info;
+ return NULL;
+}
+
+static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
+{
+}
+
+static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
+{
+ u8 pin;
+ struct irq_info *info;
+ int i, pirq, newirq;
+ int irq = 0;
+ u32 mask;
+ struct irq_router *r = pirq_router;
+ struct pci_dev *dev2;
+ char *msg = NULL;
+
+ if (!pirq_table)
+ return 0;
+
+ /* Find IRQ routing entry */
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (!pin) {
+ DBG(" -> no interrupt pin\n");
+ return 0;
+ }
+ pin = pin - 1;
+
+ DBG("IRQ for %s:%d", dev->slot_name, pin);
+ info = pirq_get_info(dev);
+ if (!info) {
+ DBG(" -> not found in routing table\n");
+ return 0;
+ }
+ pirq = info->irq[pin].link;
+ mask = info->irq[pin].bitmap;
+ if (!pirq) {
+ DBG(" -> not routed\n");
+ return 0;
+ }
+ DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+ mask &= pcibios_irq_mask;
+
+ /* Work around broken HP Pavilion Notebooks which assign USB to
+ IRQ 9 even though it is actually wired to IRQ 11 */
+
+ if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
+ dev->irq = 11;
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
+ r->set(pirq_router_dev, dev, pirq, 11);
+ }
+
+ /*
+ * Find the best IRQ to assign: use the one
+ * reported by the device if possible.
+ */
+ newirq = dev->irq;
+ if (!newirq && assign) {
+ for (i = 0; i < 16; i++) {
+ if (!(mask & (1 << i)))
+ continue;
+ if (pirq_penalty[i] < pirq_penalty[newirq] &&
+ !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) {
+ free_irq(i, dev);
+ newirq = i;
+ }
+ }
+ }
+ DBG(" -> newirq=%d", newirq);
+
+ /* Check if it is hardcoded */
+ if ((pirq & 0xf0) == 0xf0) {
+ irq = pirq & 0xf;
+ DBG(" -> hardcoded IRQ %d\n", irq);
+ msg = "Hardcoded";
+ } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) {
+ DBG(" -> got IRQ %d\n", irq);
+ msg = "Found";
+ } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+ DBG(" -> assigning IRQ %d", newirq);
+ if (r->set(pirq_router_dev, dev, pirq, newirq)) {
+ eisa_set_level_irq(newirq);
+ DBG(" ... OK\n");
+ msg = "Assigned";
+ irq = newirq;
+ }
+ }
+
+ if (!irq) {
+ DBG(" ... failed\n");
+ if (newirq && mask == (1 << newirq)) {
+ msg = "Guessed";
+ irq = newirq;
+ } else
+ return 0;
+ }
+ printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name);
+
+ /* Update IRQ for all devices with the same pirq value */
+ pci_for_each_dev(dev2) {
+ pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
+ if (!pin)
+ continue;
+ pin--;
+ info = pirq_get_info(dev2);
+ if (!info)
+ continue;
+ if (info->irq[pin].link == pirq) {
+ /* We refuse to override the dev->irq information. Give a warning! */
+ if (dev2->irq && dev2->irq != irq) {
+ printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
+ dev2->slot_name, dev2->irq, irq);
+ continue;
+ }
+ dev2->irq = irq;
+ pirq_penalty[irq]++;
+ if (dev != dev2)
+ printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name);
+ }
+ }
+ return 1;
+}
+
+void __init pcibios_irq_init(void)
+{
+ DBG("PCI: IRQ init\n");
+ pirq_table = pirq_find_routing_table();
+#ifdef CONFIG_PCI_BIOS
+ if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
+ pirq_table = pcibios_get_irq_routing_table();
+#endif
+ if (pirq_table) {
+ pirq_peer_trick();
+ pirq_find_router();
+ if (pirq_table->exclusive_irqs) {
+ int i;
+ for (i=0; i<16; i++)
+ if (!(pirq_table->exclusive_irqs & (1 << i)))
+ pirq_penalty[i] += 100;
+ }
+ /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+ if (io_apic_assign_pci_irqs)
+ pirq_table = NULL;
+ }
+}
+
+void __init pcibios_fixup_irqs(void)
+{
+ struct pci_dev *dev;
+ u8 pin;
+
+ DBG("PCI: IRQ fixup\n");
+ pci_for_each_dev(dev) {
+ /*
+ * If the BIOS has set an out of range IRQ number, just ignore it.
+ * Also keep track of which IRQ's are already in use.
+ */
+ if (dev->irq >= 16) {
+ DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq);
+ dev->irq = 0;
+ }
+ /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
+ if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+ pirq_penalty[dev->irq] = 0;
+ pirq_penalty[dev->irq]++;
+ }
+
+ pci_for_each_dev(dev) {
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Recalculate IRQ numbers if we use the I/O APIC.
+ */
+ if (io_apic_assign_pci_irqs)
+ {
+ int irq;
+
+ if (pin) {
+ pin--; /* interrupt pins are numbered starting from 1 */
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ /*
+ * Busses behind bridges are typically not listed in the MP-table.
+ * In this case we have to look up the IRQ based on the parent bus,
+ * parent slot, and pin number. The SMP code detects such bridged
+ * busses itself so we should get into this branch reliably.
+ */
+ if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+ struct pci_dev * bridge = dev->bus->self;
+
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn), pin);
+ if (irq >= 0)
+ printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n",
+ bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq);
+ }
+ if (irq >= 0) {
+ printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
+ dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
+ dev->irq = irq;
+ }
+ }
+ }
+#endif
+ /*
+ * Still no IRQ? Try to lookup one...
+ */
+ if (pin && !dev->irq)
+ pcibios_lookup_irq(dev, 0);
+ }
+}
+
+void pcibios_penalize_isa_irq(int irq)
+{
+ /*
+ * If any ISAPnP device reports an IRQ in its list of possible
+ * IRQ's, we try to avoid assigning it to PCI devices.
+ */
+ pirq_penalty[irq] += 100;
+}
+
+void pcibios_enable_irq(struct pci_dev *dev)
+{
+ u8 pin;
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+ char *msg;
+ if (io_apic_assign_pci_irqs)
+ msg = " Probably buggy MP table.";
+ else if (pci_probe & PCI_BIOS_IRQ_SCAN)
+ msg = "";
+ else
+ msg = " Please try using pci=biosirq.";
+ printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+ 'A' + pin - 1, dev->slot_name, msg);
+ }
+}
diff --git a/xen/arch/i386/pci-pc.c b/xen/arch/i386/pci-pc.c
new file mode 100644
index 0000000000..d63a54a79c
--- /dev/null
+++ b/xen/arch/i386/pci-pc.c
@@ -0,0 +1,1494 @@
+/*
+ * Low-Level PCI Support for PC
+ *
+ * (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+
+/*#include <asm/segment.h>*/
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/smpboot.h>
+
+#include "pci-i386.h"
+
+extern int numnodes;
+#define __KERNEL_CS __HYPERVISOR_CS
+#define __KERNEL_DS __HYPERVISOR_DS
+
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+
+int pcibios_last_bus = -1;
+struct pci_bus *pci_root_bus = NULL;
+struct pci_ops *pci_root_ops = NULL;
+
+int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
+int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
+
+#ifdef CONFIG_MULTIQUAD
+#define BUS2QUAD(global) (mp_bus_id_to_node[global])
+#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
+#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+#else
+#define BUS2QUAD(global) (0)
+#define BUS2LOCAL(global) (global)
+#define QUADLOCAL2BUS(quad,local) (local)
+#endif
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ * Functions for accessing PCI configuration space with type 1 accesses
+ */
+
+#ifdef CONFIG_PCI_DIRECT
+
+#ifdef CONFIG_MULTIQUAD
+#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
+ (0x80000000 | (BUS2LOCAL(bus) << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
+
+static int pci_conf1_mq_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
+
+ switch (len) {
+ case 1:
+ *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+ break;
+ case 2:
+ *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+ break;
+ case 4:
+ *value = inl_quad(0xCFC, BUS2QUAD(bus));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf1_mq_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
+
+ switch (len) {
+ case 1:
+ outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+ break;
+ case 2:
+ outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+ break;
+ case 4:
+ outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf1_read_mq_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+
+ *value = (u8)data;
+
+ return result;
+}
+
+static int pci_conf1_read_mq_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+
+ *value = (u16)data;
+
+ return result;
+}
+
+static int pci_conf1_read_mq_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ if (!value)
+ return -EINVAL;
+
+ return pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf1_write_mq_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf1_write_mq_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf1_write_mq_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_mq_conf1 = {
+ pci_conf1_read_mq_config_byte,
+ pci_conf1_read_mq_config_word,
+ pci_conf1_read_mq_config_dword,
+ pci_conf1_write_mq_config_byte,
+ pci_conf1_write_mq_config_word,
+ pci_conf1_write_mq_config_dword
+};
+
+#endif /* !CONFIG_MULTIQUAD */
+#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
+ (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
+
+static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* !CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+ switch (len) {
+ case 1:
+ *value = inb(0xCFC + (reg & 3));
+ break;
+ case 2:
+ *value = inw(0xCFC + (reg & 2));
+ break;
+ case 4:
+ *value = inl(0xCFC);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* !CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+ switch (len) {
+ case 1:
+ outb((u8)value, 0xCFC + (reg & 3));
+ break;
+ case 2:
+ outw((u16)value, 0xCFC + (reg & 2));
+ break;
+ case 4:
+ outl((u32)value, 0xCFC);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+#undef PCI_CONF1_ADDRESS
+
+static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+
+ *value = (u8)data;
+
+ return result;
+}
+
+static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+
+ *value = (u16)data;
+
+ return result;
+}
+
+static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf1 = {
+ pci_conf1_read_config_byte,
+ pci_conf1_read_config_word,
+ pci_conf1_read_config_dword,
+ pci_conf1_write_config_byte,
+ pci_conf1_write_config_word,
+ pci_conf1_write_config_dword
+};
+
+
+/*
+ * Functions for accessing PCI configuration space with type 2 accesses
+ */
+
+#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg)
+
+static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ if (dev & 0x10)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+ outb((u8)bus, 0xCFA);
+
+ switch (len) {
+ case 1:
+ *value = inb(PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 2:
+ *value = inw(PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 4:
+ *value = inl(PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ }
+
+ outb (0, 0xCF8);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+ unsigned long flags;
+
+ if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
+ return -EINVAL;
+
+ if (dev & 0x10)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+ outb((u8)bus, 0xCFA);
+
+ switch (len) {
+ case 1:
+ outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 2:
+ outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 4:
+ outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ }
+
+ outb (0, 0xCF8);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+#undef PCI_CONF2_ADDRESS
+
+static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+ result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+ *value = (u8)data;
+ return result;
+}
+
+static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+ result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+ *value = (u16)data;
+ return result;
+}
+
+static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf2 = {
+ pci_conf2_read_config_byte,
+ pci_conf2_read_config_word,
+ pci_conf2_read_config_dword,
+ pci_conf2_write_config_byte,
+ pci_conf2_write_config_word,
+ pci_conf2_write_config_dword
+};
+
+
+/*
+ * Before we decide to use direct hardware access mechanisms, we try to do some
+ * trivial checks to ensure it at least _seems_ to be working -- we just test
+ * whether bus 00 contains a host bridge (this is similar to checking
+ * techniques used in XFree86, but ours should be more reliable since we
+ * attempt to make use of direct access hints provided by the PCI BIOS).
+ *
+ * This should be close to trivial, but it isn't, because there are buggy
+ * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
+ */
+static int __devinit pci_sanity_check(struct pci_ops *o)
+{
+ u16 x;
+ struct pci_bus bus; /* Fake bus and device */
+ struct pci_dev dev;
+
+ if (pci_probe & PCI_NO_CHECKS)
+ return 1;
+ bus.number = 0;
+ dev.bus = &bus;
+ for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
+ if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
+ (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
+ (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
+ (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
+ return 1;
+ DBG("PCI: Sanity check failed\n");
+ return 0;
+}
+
+static struct pci_ops * __devinit pci_check_direct(void)
+{
+ unsigned int tmp;
+ unsigned long flags;
+
+ __save_flags(flags); __cli();
+
+ /*
+ * Check if configuration type 1 works.
+ */
+ if (pci_probe & PCI_PROBE_CONF1) {
+ outb (0x01, 0xCFB);
+ tmp = inl (0xCF8);
+ outl (0x80000000, 0xCF8);
+ if (inl (0xCF8) == 0x80000000 &&
+ pci_sanity_check(&pci_direct_conf1)) {
+ outl (tmp, 0xCF8);
+ __restore_flags(flags);
+ printk(KERN_INFO "PCI: Using configuration type 1\n");
+ request_region(0xCF8, 8, "PCI conf1");
+
+#ifdef CONFIG_MULTIQUAD
+ /* Multi-Quad has an extended PCI Conf1 */
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ return &pci_direct_mq_conf1;
+#endif
+ return &pci_direct_conf1;
+ }
+ outl (tmp, 0xCF8);
+ }
+
+ /*
+ * Check if configuration type 2 works.
+ */
+ if (pci_probe & PCI_PROBE_CONF2) {
+ outb (0x00, 0xCFB);
+ outb (0x00, 0xCF8);
+ outb (0x00, 0xCFA);
+ if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 &&
+ pci_sanity_check(&pci_direct_conf2)) {
+ __restore_flags(flags);
+ printk(KERN_INFO "PCI: Using configuration type 2\n");
+ request_region(0xCF8, 4, "PCI conf2");
+ return &pci_direct_conf2;
+ }
+ }
+
+ __restore_flags(flags);
+ return NULL;
+}
+
+#endif
+
+/*
+ * BIOS32 and PCI BIOS handling.
+ */
+
+#ifdef CONFIG_PCI_BIOS
+
+#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX
+#define PCIBIOS_PCI_BIOS_PRESENT 0xb101
+#define PCIBIOS_FIND_PCI_DEVICE 0xb102
+#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103
+#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106
+#define PCIBIOS_READ_CONFIG_BYTE 0xb108
+#define PCIBIOS_READ_CONFIG_WORD 0xb109
+#define PCIBIOS_READ_CONFIG_DWORD 0xb10a
+#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b
+#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c
+#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d
+#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e
+#define PCIBIOS_SET_PCI_HW_INT 0xb10f
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/* PCI BIOS hardware mechanism flags */
+#define PCIBIOS_HW_TYPE1 0x01
+#define PCIBIOS_HW_TYPE2 0x02
+#define PCIBIOS_HW_TYPE1_SPEC 0x10
+#define PCIBIOS_HW_TYPE2_SPEC 0x20
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ * Standard BIOS 32-bit Service Directory Proposal
+ * Revision 0.4 May 24, 1993
+ * Phoenix Technologies Ltd.
+ * Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+ struct {
+ unsigned long signature; /* _32_ */
+ unsigned long entry; /* 32 bit physical address */
+ unsigned char revision; /* Revision level, 0 */
+ unsigned char length; /* Length in paragraphs should be 01 */
+ unsigned char checksum; /* All bytes must add up to zero */
+ unsigned char reserved[5]; /* Must be zero */
+ } fields;
+ char chars[16];
+};
+
+/*
+ * Physical address of the service directory. I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make pcibios_present() take a memory start parameter and store
+ * the array there.
+ */
+
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} bios32_indirect = { 0, __KERNEL_CS };
+
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+ unsigned char return_code; /* %al */
+ unsigned long address; /* %ebx */
+ unsigned long length; /* %ecx */
+ unsigned long entry; /* %edx */
+ unsigned long flags;
+
+ __save_flags(flags); __cli();
+ __asm__("lcall *(%%edi); cld"
+ : "=a" (return_code),
+ "=b" (address),
+ "=c" (length),
+ "=d" (entry)
+ : "0" (service),
+ "1" (0),
+ "D" (&bios32_indirect));
+ __restore_flags(flags);
+
+ switch (return_code) {
+ case 0:
+ return address + entry;
+ case 0x80: /* Not present */
+ printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
+ return 0;
+ default: /* Shouldn't happen */
+ printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
+ service, return_code);
+ return 0;
+ }
+}
+
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} pci_indirect = { 0, __KERNEL_CS };
+
+static int pci_bios_present;
+
+static int __devinit check_pcibios(void)
+{
+ u32 signature, eax, ebx, ecx;
+ u8 status, major_ver, minor_ver, hw_mech;
+ unsigned long flags, pcibios_entry;
+
+ if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+ pci_indirect.address = pcibios_entry + PAGE_OFFSET;
+
+ __save_flags(flags); __cli();
+ __asm__(
+ "lcall *(%%edi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=d" (signature),
+ "=a" (eax),
+ "=b" (ebx),
+ "=c" (ecx)
+ : "1" (PCIBIOS_PCI_BIOS_PRESENT),
+ "D" (&pci_indirect)
+ : "memory");
+ __restore_flags(flags);
+
+ status = (eax >> 8) & 0xff;
+ hw_mech = eax & 0xff;
+ major_ver = (ebx >> 8) & 0xff;
+ minor_ver = ebx & 0xff;
+ if (pcibios_last_bus < 0)
+ pcibios_last_bus = ecx & 0xff;
+ DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
+ status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
+ if (status || signature != PCI_SIGNATURE) {
+ printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
+ status, signature);
+ return 0;
+ }
+ printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
+ major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
+#ifdef CONFIG_PCI_DIRECT
+ if (!(hw_mech & PCIBIOS_HW_TYPE1))
+ pci_probe &= ~PCI_PROBE_CONF1;
+ if (!(hw_mech & PCIBIOS_HW_TYPE2))
+ pci_probe &= ~PCI_PROBE_CONF2;
+#endif
+ return 1;
+ }
+ return 0;
+}
+
+static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
+ unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+ unsigned short bx;
+ unsigned short ret;
+
+ __asm__("lcall *(%%edi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=b" (bx),
+ "=a" (ret)
+ : "1" (PCIBIOS_FIND_PCI_DEVICE),
+ "c" (device_id),
+ "d" (vendor),
+ "S" ((int) index),
+ "D" (&pci_indirect));
+ *bus = (bx >> 8) & 0xff;
+ *device_fn = bx & 0xff;
+ return (int) (ret & 0xff00) >> 8;
+}
+
+static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+ unsigned long result = 0;
+ unsigned long flags;
+ unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ switch (len) {
+ case 1:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (result)
+ : "1" (PCIBIOS_READ_CONFIG_BYTE),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 2:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (result)
+ : "1" (PCIBIOS_READ_CONFIG_WORD),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 4:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (result)
+ : "1" (PCIBIOS_READ_CONFIG_DWORD),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+ unsigned long result = 0;
+ unsigned long flags;
+ unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+ if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ switch (len) {
+ case 1:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (result)
+ : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+ "c" (value),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 2:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (result)
+ : "0" (PCIBIOS_WRITE_CONFIG_WORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 4:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (result)
+ : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+
+ if (!value)
+ BUG();
+
+ result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+
+ *value = (u8)data;
+
+ return result;
+}
+
+static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+
+ if (!value)
+ BUG();
+
+ result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+
+ *value = (u16)data;
+
+ return result;
+}
+
+static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ if (!value)
+ BUG();
+
+ return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+
+/*
+ * Function table for BIOS32 access
+ */
+
+static struct pci_ops pci_bios_access = {
+ pci_bios_read_config_byte,
+ pci_bios_read_config_word,
+ pci_bios_read_config_dword,
+ pci_bios_write_config_byte,
+ pci_bios_write_config_word,
+ pci_bios_write_config_dword
+};
+
+/*
+ * Try to find PCI BIOS.
+ */
+
+static struct pci_ops * __devinit pci_find_bios(void)
+{
+ union bios32 *check;
+ unsigned char sum;
+ int i, length;
+
+ /*
+ * Follow the standard procedure for locating the BIOS32 Service
+ * directory by scanning the permissible address range from
+ * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+ */
+
+ for (check = (union bios32 *) __va(0xe0000);
+ check <= (union bios32 *) __va(0xffff0);
+ ++check) {
+ if (check->fields.signature != BIOS32_SIGNATURE)
+ continue;
+ length = check->fields.length * 16;
+ if (!length)
+ continue;
+ sum = 0;
+ for (i = 0; i < length ; ++i)
+ sum += check->chars[i];
+ if (sum != 0)
+ continue;
+ if (check->fields.revision != 0) {
+ printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
+ check->fields.revision, check);
+ continue;
+ }
+ DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
+ if (check->fields.entry >= 0x100000) {
+ printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
+ return NULL;
+ } else {
+ unsigned long bios32_entry = check->fields.entry;
+ DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+ bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+ if (check_pcibios())
+ return &pci_bios_access;
+ }
+ break; /* Hopefully more than one BIOS32 cannot happen... */
+ }
+
+ return NULL;
+}
+
+/*
+ * Sort the device list according to PCI BIOS. Nasty hack, but since some
+ * fool forgot to define the `correct' device order in the PCI BIOS specs
+ * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
+ * which used BIOS ordering, we are bound to do this...
+ */
+
+static void __devinit pcibios_sort(void)
+{
+ LIST_HEAD(sorted_devices);
+ struct list_head *ln;
+ struct pci_dev *dev, *d;
+ int idx, found;
+ unsigned char bus, devfn;
+
+ DBG("PCI: Sorting device list...\n");
+ while (!list_empty(&pci_devices)) {
+ ln = pci_devices.next;
+ dev = pci_dev_g(ln);
+ idx = found = 0;
+ while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
+ idx++;
+ for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) {
+ d = pci_dev_g(ln);
+ if (d->bus->number == bus && d->devfn == devfn) {
+ list_del(&d->global_list);
+ list_add_tail(&d->global_list, &sorted_devices);
+ if (d == dev)
+ found = 1;
+ break;
+ }
+ }
+ if (ln == &pci_devices) {
+ printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
+ /*
+ * We must not continue scanning as several buggy BIOSes
+ * return garbage after the last device. Grr.
+ */
+ break;
+ }
+ }
+ if (!found) {
+ printk(KERN_WARNING "PCI: Device %02x:%02x not found by BIOS\n",
+ dev->bus->number, dev->devfn);
+ list_del(&dev->global_list);
+ list_add_tail(&dev->global_list, &sorted_devices);
+ }
+ }
+ list_splice(&sorted_devices, &pci_devices);
+}
+
+/*
+ * BIOS Functions for IRQ Routing
+ */
+
+struct irq_routing_options {
+ u16 size;
+ struct irq_info *table;
+ u16 segment;
+} __attribute__((packed));
+
+struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
+{
+ struct irq_routing_options opt;
+ struct irq_routing_table *rt = NULL;
+ int ret, map;
+ unsigned long page;
+
+ if (!pci_bios_present)
+ return NULL;
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ return NULL;
+ opt.table = (struct irq_info *) page;
+ opt.size = PAGE_SIZE;
+ opt.segment = __KERNEL_DS;
+
+ DBG("PCI: Fetching IRQ routing table... ");
+ __asm__("push %%es\n\t"
+ "push %%ds\n\t"
+ "pop %%es\n\t"
+ "lcall *(%%esi); cld\n\t"
+ "pop %%es\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret),
+ "=b" (map)
+ : "0" (PCIBIOS_GET_ROUTING_OPTIONS),
+ "1" (0),
+ "D" ((long) &opt),
+ "S" (&pci_indirect));
+ DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map);
+ if (ret & 0xff00)
+ printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
+ else if (opt.size) {
+ rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
+ if (rt) {
+ memset(rt, 0, sizeof(struct irq_routing_table));
+ rt->size = opt.size + sizeof(struct irq_routing_table);
+ rt->exclusive_irqs = map;
+ memcpy(rt->slots, (void *) page, opt.size);
+ printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n");
+ }
+ }
+ free_page(page);
+ return rt;
+}
+
+
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
+{
+ int ret;
+
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_SET_PCI_HW_INT),
+ "b" ((dev->bus->number << 8) | dev->devfn),
+ "c" ((irq << 8) | (pin + 10)),
+ "S" (&pci_indirect));
+ return !(ret & 0xff00);
+}
+
+#endif
+
+/*
+ * Several buggy motherboards address only 16 devices and mirror
+ * them to next 16 IDs. We try to detect this `feature' on all
+ * primary buses (those containing host bridges as they are
+ * expected to be unique) and remove the ghost devices.
+ */
+
+static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
+{
+ struct list_head *ln, *mn;
+ struct pci_dev *d, *e;
+ int mirror = PCI_DEVFN(16,0);
+ int seen_host_bridge = 0;
+ int i;
+
+ DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
+ for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
+ d = pci_dev_b(ln);
+ if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+ seen_host_bridge++;
+ for (mn=ln->next; mn != &b->devices; mn=mn->next) {
+ e = pci_dev_b(mn);
+ if (e->devfn != d->devfn + mirror ||
+ e->vendor != d->vendor ||
+ e->device != d->device ||
+ e->class != d->class)
+ continue;
+ for(i=0; i<PCI_NUM_RESOURCES; i++)
+ if (e->resource[i].start != d->resource[i].start ||
+ e->resource[i].end != d->resource[i].end ||
+ e->resource[i].flags != d->resource[i].flags)
+ continue;
+ break;
+ }
+ if (mn == &b->devices)
+ return;
+ }
+ if (!seen_host_bridge)
+ return;
+ printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
+
+ ln = &b->devices;
+ while (ln->next != &b->devices) {
+ d = pci_dev_b(ln->next);
+ if (d->devfn >= mirror) {
+ list_del(&d->global_list);
+ list_del(&d->bus_list);
+ kfree(d);
+ } else
+ ln = ln->next;
+ }
+}
+
+/*
+ * Discover remaining PCI buses in case there are peer host bridges.
+ * We use the number of last PCI bus provided by the PCI BIOS.
+ */
+static void __devinit pcibios_fixup_peer_bridges(void)
+{
+ int n;
+ struct pci_bus bus;
+ struct pci_dev dev;
+ u16 l;
+
+ if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
+ return;
+ DBG("PCI: Peer bridge fixup\n");
+ for (n=0; n <= pcibios_last_bus; n++) {
+ if (pci_bus_exists(&pci_root_buses, n))
+ continue;
+ bus.number = n;
+ bus.ops = pci_root_ops;
+ dev.bus = &bus;
+ for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
+ if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
+ l != 0x0000 && l != 0xffff) {
+ DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l);
+ printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
+ pci_scan_bus(n, pci_root_ops, NULL);
+ break;
+ }
+ }
+}
+
+/*
+ * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+ */
+
+static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+{
+ /*
+ * i450NX -- Find and scan all secondary buses on all PXB's.
+ */
+ int pxb, reg;
+ u8 busno, suba, subb;
+#ifdef CONFIG_MULTIQUAD
+ int quad = BUS2QUAD(d->bus->number);
+#endif
+ printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name);
+ reg = 0xd0;
+ for(pxb=0; pxb<2; pxb++) {
+ pci_read_config_byte(d, reg++, &busno);
+ pci_read_config_byte(d, reg++, &suba);
+ pci_read_config_byte(d, reg++, &subb);
+ DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
+ if (busno)
+ pci_scan_bus(QUADLOCAL2BUS(quad,busno), pci_root_ops, NULL); /* Bus A */
+ if (suba < subb)
+ pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), pci_root_ops, NULL); /* Bus B */
+ }
+ pcibios_last_bus = -1;
+}
+
+static void __devinit pci_fixup_i450gx(struct pci_dev *d)
+{
+ /*
+ * i450GX and i450KX -- Find and scan all secondary buses.
+ * (called separately for each PCI bridge found)
+ */
+ u8 busno;
+ pci_read_config_byte(d, 0x4a, &busno);
+ printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno);
+ pci_scan_bus(busno, pci_root_ops, NULL);
+ pcibios_last_bus = -1;
+}
+
+static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
+{
+ /*
+ * UM8886BF IDE controller sets region type bits incorrectly,
+ * therefore they look like memory despite of them being I/O.
+ */
+ int i;
+
+ printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", d->slot_name);
+ for(i=0; i<4; i++)
+ d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
+}
+
+static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
+{
+ /*
+ * NCR 53C810 returns class code 0 (at least on some systems).
+ * Fix class to be PCI_CLASS_STORAGE_SCSI
+ */
+ if (!d->class) {
+ printk("PCI: fixing NCR 53C810 class code for %s\n", d->slot_name);
+ d->class = PCI_CLASS_STORAGE_SCSI << 8;
+ }
+}
+
+static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
+{
+ int i;
+
+ /*
+ * PCI IDE controllers use non-standard I/O port decoding, respect it.
+ */
+ if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
+ return;
+ DBG("PCI: IDE base address fixup for %s\n", d->slot_name);
+ for(i=0; i<4; i++) {
+ struct resource *r = &d->resource[i];
+ if ((r->start & ~0x80) == 0x374) {
+ r->start |= 2;
+ r->end = r->start;
+ }
+ }
+}
+
+static void __devinit pci_fixup_ide_trash(struct pci_dev *d)
+{
+ int i;
+
+ /*
+ * There exist PCI IDE controllers which have utter garbage
+ * in first four base registers. Ignore that.
+ */
+ DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
+ for(i=0; i<4; i++)
+ d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
+}
+
+static void __devinit pci_fixup_latency(struct pci_dev *d)
+{
+ /*
+ * SiS 5597 and 5598 chipsets require latency timer set to
+ * at most 32 to avoid lockups.
+ */
+ DBG("PCI: Setting max latency to 32\n");
+ pcibios_max_latency = 32;
+}
+
+static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
+{
+ /*
+ * PIIX4 ACPI device: hardwired IRQ9
+ */
+ d->irq = 9;
+}
+
+/*
+ * Addresses issues with problems in the memory write queue timer in
+ * certain VIA Northbridges. This bugfix is per VIA's specifications,
+ * except for the KL133/KM133: clearing bit 5 on those Northbridges seems
+ * to trigger a bug in its integrated ProSavage video card, which
+ * causes screen corruption. We only clear bits 6 and 7 for that chipset,
+ * until VIA can provide us with definitive information on why screen
+ * corruption occurs, and what exactly those bits do.
+ *
+ * VIA 8363,8622,8361 Northbridges:
+ * - bits 5, 6, 7 at offset 0x55 need to be turned off
+ * VIA 8367 (KT266x) Northbridges:
+ * - bits 5, 6, 7 at offset 0x95 need to be turned off
+ * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges:
+ * - bits 6, 7 at offset 0x55 need to be turned off
+ */
+
+#define VIA_8363_KL133_REVISION_ID 0x81
+#define VIA_8363_KM133_REVISION_ID 0x84
+
+static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d)
+{
+ u8 v;
+ u8 revision;
+ int where = 0x55;
+ int mask = 0x1f; /* clear bits 5, 6, 7 by default */
+
+ pci_read_config_byte(d, PCI_REVISION_ID, &revision);
+
+ if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
+ /* fix pci bus latency issues resulted by NB bios error
+ it appears on bug free^Wreduced kt266x's bios forces
+ NB latency to zero */
+ pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
+
+ where = 0x95; /* the memory write queue timer register is
+ different for the KT266x's: 0x95 not 0x55 */
+ } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
+ (revision == VIA_8363_KL133_REVISION_ID ||
+ revision == VIA_8363_KM133_REVISION_ID)) {
+ mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
+ causes screen corruption on the KL133/KM133 */
+ }
+
+ pci_read_config_byte(d, where, &v);
+ if (v & ~mask) {
+ printk("Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
+ d->device, revision, where, v, mask, v & mask);
+ v &= mask;
+ pci_write_config_byte(d, where, v);
+ }
+}
+
+/*
+ * For some reasons Intel decided that certain parts of their
+ * 815, 845 and some other chipsets must look like PCI-to-PCI bridges
+ * while they are obviously not. The 82801 family (AA, AB, BAM/CAM,
+ * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according
+ * to Intel terminology. These devices do forward all addresses from
+ * system to PCI bus no matter what are their window settings, so they are
+ * "transparent" (or subtractive decoding) from programmers point of view.
+ */
+static void __init pci_fixup_transparent_bridge(struct pci_dev *dev)
+{
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
+ (dev->device & 0xff00) == 0x2400)
+ dev->transparent = 1;
+}
+
+struct pci_fixup pcibios_fixups[] = {
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash },
+ { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge },
+ { 0 }
+};
+
+/*
+ * Called after each bus is probed, but before its children
+ * are examined.
+ */
+
+void __devinit pcibios_fixup_bus(struct pci_bus *b)
+{
+ pcibios_fixup_ghosts(b);
+ pci_read_bridge_bases(b);
+}
+
+
+void __devinit pcibios_config_init(void)
+{
+ /*
+ * Try all known PCI access methods. Note that we support using
+ * both PCI BIOS and direct access, with a preference for direct.
+ */
+
+#ifdef CONFIG_PCI_DIRECT
+ struct pci_ops *tmp = NULL;
+#endif
+
+
+#ifdef CONFIG_PCI_BIOS
+ if ((pci_probe & PCI_PROBE_BIOS)
+ && ((pci_root_ops = pci_find_bios()))) {
+ pci_probe |= PCI_BIOS_SORT;
+ pci_bios_present = 1;
+ pci_config_read = pci_bios_read;
+ pci_config_write = pci_bios_write;
+ }
+#endif
+
+#ifdef CONFIG_PCI_DIRECT
+ if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2))
+ && (tmp = pci_check_direct())) {
+ pci_root_ops = tmp;
+ if (pci_root_ops == &pci_direct_conf1) {
+ pci_config_read = pci_conf1_read;
+ pci_config_write = pci_conf1_write;
+ }
+ else {
+ pci_config_read = pci_conf2_read;
+ pci_config_write = pci_conf2_write;
+ }
+ }
+#endif
+
+ return;
+}
+
+void __init pcibios_init(void)
+{
+ int quad;
+
+ if (!pci_root_ops)
+ pcibios_config_init();
+ if (!pci_root_ops) {
+ printk(KERN_WARNING "PCI: System does not support PCI\n");
+ return;
+ }
+
+ printk(KERN_INFO "PCI: Probing PCI hardware\n");
+ pci_root_bus = pci_scan_bus(0, pci_root_ops, NULL);
+ if (clustered_apic_mode && (numnodes > 1)) {
+ for (quad = 1; quad < numnodes; ++quad) {
+ printk("Scanning PCI bus %d for quad %d\n",
+ QUADLOCAL2BUS(quad,0), quad);
+ pci_scan_bus(QUADLOCAL2BUS(quad,0),
+ pci_root_ops, NULL);
+ }
+ }
+
+ pcibios_irq_init();
+ pcibios_fixup_peer_bridges();
+ pcibios_fixup_irqs();
+ pcibios_resource_survey();
+
+#ifdef CONFIG_PCI_BIOS
+ if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
+ pcibios_sort();
+#endif
+}
+
+char * __devinit pcibios_setup(char *str)
+{
+ if (!strcmp(str, "off")) {
+ pci_probe = 0;
+ return NULL;
+ }
+#ifdef CONFIG_PCI_BIOS
+ else if (!strcmp(str, "bios")) {
+ pci_probe = PCI_PROBE_BIOS;
+ return NULL;
+ } else if (!strcmp(str, "nobios")) {
+ pci_probe &= ~PCI_PROBE_BIOS;
+ return NULL;
+ } else if (!strcmp(str, "nosort")) {
+ pci_probe |= PCI_NO_SORT;
+ return NULL;
+ } else if (!strcmp(str, "biosirq")) {
+ pci_probe |= PCI_BIOS_IRQ_SCAN;
+ return NULL;
+ }
+#endif
+#ifdef CONFIG_PCI_DIRECT
+ else if (!strcmp(str, "conf1")) {
+ pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
+ return NULL;
+ }
+ else if (!strcmp(str, "conf2")) {
+ pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
+ return NULL;
+ }
+#endif
+ else if (!strcmp(str, "rom")) {
+ pci_probe |= PCI_ASSIGN_ROMS;
+ return NULL;
+ } else if (!strcmp(str, "assign-busses")) {
+ pci_probe |= PCI_ASSIGN_ALL_BUSSES;
+ return NULL;
+ } else if (!strncmp(str, "irqmask=", 8)) {
+ pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
+ return NULL;
+ } else if (!strncmp(str, "lastbus=", 8)) {
+ pcibios_last_bus = simple_strtol(str+8, NULL, 0);
+ return NULL;
+ }
+ return str;
+}
+
+unsigned int pcibios_assign_all_busses(void)
+{
+ return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ int err;
+
+ if ((err = pcibios_enable_resources(dev, mask)) < 0)
+ return err;
+ pcibios_enable_irq(dev);
+ return 0;
+}
diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c
new file mode 100644
index 0000000000..3c048d72bf
--- /dev/null
+++ b/xen/arch/i386/process.c
@@ -0,0 +1,418 @@
+/*
+ * linux/arch/i386/kernel/process.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <xeno/config.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/smp.h>
+#include <asm/ptrace.h>
+#include <xeno/delay.h>
+#include <xeno/interrupt.h>
+#include <asm/mc146818rtc.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+
+#include <xeno/irq.h>
+#include <xeno/event.h>
+
+asmlinkage void ret_from_newdomain(void) __asm__("ret_from_newdomain");
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+ hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+ hlt_counter--;
+}
+
+/*
+ * We use this if we don't have any better
+ * idle routine..
+ */
+static void default_idle(void)
+{
+ if (!hlt_counter) {
+ __cli();
+ if (!current->hyp_events && !softirq_pending(smp_processor_id()))
+ safe_halt();
+ else
+ __sti();
+ }
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+ int cpu = smp_processor_id();
+
+ ASSERT(current->domain == IDLE_DOMAIN_ID);
+
+ current->has_cpu = 1;
+ (void)wake_up(current);
+ schedule();
+
+ /*
+ * Declares CPU setup done to the boot processor.
+ * Therefore memory barrier to ensure state is visible.
+ */
+ smp_mb();
+ init_idle();
+
+ for ( ; ; )
+ {
+ while (!current->hyp_events && !softirq_pending(cpu))
+ default_idle();
+ do_hyp_events();
+ do_softirq();
+ }
+}
+
+static long no_idt[2];
+static int reboot_mode;
+int reboot_thru_bios = 0;
+
+#ifdef CONFIG_SMP
+int reboot_smp = 0;
+static int reboot_cpu = -1;
+/* shamelessly grabbed from lib/vsprintf.c for readability */
+#define is_digit(c) ((c) >= '0' && (c) <= '9')
+#endif
+
+
+static inline void kb_wait(void)
+{
+ int i;
+
+ for (i=0; i<0x10000; i++)
+ if ((inb_p(0x64) & 0x02) == 0)
+ break;
+}
+
+
+void machine_restart(char * __unused)
+{
+#if CONFIG_SMP
+ int cpuid;
+
+ cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+ if (reboot_smp) {
+
+ /* check to see if reboot_cpu is valid
+ if its not, default to the BSP */
+ if ((reboot_cpu == -1) ||
+ (reboot_cpu > (NR_CPUS -1)) ||
+ !(phys_cpu_present_map & (1<<cpuid)))
+ reboot_cpu = boot_cpu_physical_apicid;
+
+ reboot_smp = 0; /* use this as a flag to only go through this once*/
+ /* re-run this function on the other CPUs
+ it will fall though this section since we have
+ cleared reboot_smp, and do the reboot if it is the
+ correct CPU, otherwise it halts. */
+ if (reboot_cpu != cpuid)
+ smp_call_function((void *)machine_restart , NULL, 1, 0);
+ }
+
+ /* if reboot_cpu is still -1, then we want a tradional reboot,
+ and if we are not running on the reboot_cpu,, halt */
+ if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+ for (;;)
+ __asm__ __volatile__ ("hlt");
+ }
+ /*
+ * Stop all CPUs and turn off local APICs and the IO-APIC, so
+ * other OSs see a clean IRQ state.
+ */
+ smp_send_stop();
+ disable_IO_APIC();
+#endif
+
+ if(!reboot_thru_bios) {
+ /* rebooting needs to touch the page at absolute addr 0 */
+ *((unsigned short *)__va(0x472)) = reboot_mode;
+ for (;;) {
+ int i;
+ for (i=0; i<100; i++) {
+ kb_wait();
+ udelay(50);
+ outb(0xfe,0x64); /* pulse reset low */
+ udelay(50);
+ }
+ /* That didn't work - force a triple fault.. */
+ __asm__ __volatile__("lidt %0": :"m" (no_idt));
+ __asm__ __volatile__("int3");
+ }
+ }
+
+ panic("Need to reinclude BIOS reboot code\n");
+}
+
+void machine_halt(void)
+{
+ machine_restart(0);
+}
+
+void machine_power_off(void)
+{
+ machine_restart(0);
+}
+
+extern void show_trace(unsigned long* esp);
+
+void show_regs(struct pt_regs * regs)
+{
+ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+
+ printk("\n");
+ printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
+ if (regs->xcs & 3)
+ printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+ printk(" EFLAGS: %08lx\n",regs->eflags);
+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+ regs->eax,regs->ebx,regs->ecx,regs->edx);
+ printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+ regs->esi, regs->edi, regs->ebp);
+ printk(" DS: %04x ES: %04x\n",
+ 0xffff & regs->xds,0xffff & regs->xes);
+
+ __asm__("movl %%cr0, %0": "=r" (cr0));
+ __asm__("movl %%cr2, %0": "=r" (cr2));
+ __asm__("movl %%cr3, %0": "=r" (cr3));
+ /* This could fault if %cr4 does not exist */
+ __asm__("1: movl %%cr4, %0 \n"
+ "2: \n"
+ ".section __ex_table,\"a\" \n"
+ ".long 1b,2b \n"
+ ".previous \n"
+ : "=r" (cr4): "0" (0));
+ printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+ show_trace(&regs->esp);
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void release_segments(struct mm_struct *mm)
+{
+#if 0
+ void * ldt = mm.context.segments;
+
+ /*
+ * free the LDT
+ */
+ if (ldt) {
+ mm.context.segments = NULL;
+ clear_LDT();
+ vfree(ldt);
+ }
+#endif
+}
+
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+ /* nothing to do ... */
+}
+
+void flush_thread(void)
+{
+ struct task_struct *tsk = current;
+
+ memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+ /*
+ * Forget coprocessor state..
+ */
+ clear_fpu(tsk);
+ tsk->flags &= ~PF_DONEFPUINIT;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+#if 0
+ if (dead_task->mm) {
+ void * ldt = dead_task->mm.context.segments;
+
+ // temporary debugging check
+ if (ldt) {
+ printk("WARNING: dead process %8s still has LDT? <%p>\n",
+ dead_task->comm, ldt);
+ BUG();
+ }
+ }
+#endif
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
+{
+#if 0
+ struct mm_struct * old_mm;
+ void *old_ldt, *ldt;
+
+ ldt = NULL;
+ old_mm = current->mm;
+ if (old_mm && (old_ldt = old_mm.context.segments) != NULL) {
+ /*
+ * Completely new LDT, we initialize it from the parent:
+ */
+ ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (!ldt)
+ printk(KERN_WARNING "ldt allocation failed\n");
+ else
+ memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ }
+ new_mm.context.segments = ldt;
+ new_mm.context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */
+#endif
+}
+
+
+void new_thread(struct task_struct *p,
+ unsigned long start_pc,
+ unsigned long start_stack,
+ unsigned long start_info)
+{
+ struct pt_regs * regs;
+
+ regs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
+ memset(regs, 0, sizeof(*regs));
+
+ /*
+ * Initial register values:
+ * DS,ES,FS,GS = __GUEST_DS
+ * CS:EIP = __GUEST_CS:start_pc
+ * SS:ESP = __GUEST_DS:start_stack
+ * ESI = start_info
+ * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
+ */
+ p->thread.fs = p->thread.gs = __GUEST_DS;
+ regs->xds = regs->xes = regs->xss = __GUEST_DS;
+ regs->xcs = __GUEST_CS;
+ regs->eip = start_pc;
+ regs->esp = start_stack;
+ regs->esi = start_info;
+
+ p->thread.esp = (unsigned long) regs;
+ p->thread.esp0 = (unsigned long) (regs+1);
+
+ p->thread.eip = (unsigned long) ret_from_newdomain;
+
+ __save_flags(regs->eflags);
+ regs->eflags |= X86_EFLAGS_IF;
+
+ /* No fast trap at start of day. */
+ SET_DEFAULT_FAST_TRAP(&p->thread);
+}
+
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+ __asm__("movl %0,%%db" #register \
+ : /* no output */ \
+ :"r" (thread->debugreg[register]))
+
+/*
+ * switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ */
+/* NB. prev_p passed in %eax, next_p passed in %edx */
+void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ struct thread_struct *prev = &prev_p->thread,
+ *next = &next_p->thread;
+ struct tss_struct *tss = init_tss + smp_processor_id();
+
+ unlazy_fpu(prev_p);
+
+ /* Switch the fast-trap handler. */
+ CLEAR_FAST_TRAP(&prev_p->thread);
+ SET_FAST_TRAP(&next_p->thread);
+
+ tss->esp0 = next->esp0;
+ tss->esp1 = next->esp1;
+ tss->ss1 = next->ss1;
+
+ /*
+ * Save away %fs and %gs. No need to save %es and %ds, as
+ * those are always kernel segments while inside the kernel.
+ */
+ asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
+ asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+
+ /* Switch GDT and LDT. */
+ __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
+ __load_LDT(next_p->mm.ldt_sel);
+
+ /*
+ * Restore %fs and %gs.
+ */
+ loadsegment(fs, next->fs);
+ loadsegment(gs, next->gs);
+
+ /*
+ * Now maybe reload the debug registers
+ */
+ if (next->debugreg[7]){
+ loaddebug(next, 0);
+ loaddebug(next, 1);
+ loaddebug(next, 2);
+ loaddebug(next, 3);
+ /* no 4 and 5 */
+ loaddebug(next, 6);
+ loaddebug(next, 7);
+ }
+
+}
diff --git a/xen/arch/i386/rwlock.c b/xen/arch/i386/rwlock.c
new file mode 100644
index 0000000000..3b9b689c8a
--- /dev/null
+++ b/xen/arch/i386/rwlock.c
@@ -0,0 +1,33 @@
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+
+#if defined(CONFIG_SMP)
+asm(
+"
+.align 4
+.globl __write_lock_failed
+__write_lock_failed:
+ " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)
+1: rep; nop
+ cmpl $" RW_LOCK_BIAS_STR ",(%eax)
+ jne 1b
+
+ " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)
+ jnz __write_lock_failed
+ ret
+
+
+.align 4
+.globl __read_lock_failed
+__read_lock_failed:
+ lock ; incl (%eax)
+1: rep; nop
+ cmpl $1,(%eax)
+ js 1b
+
+ lock ; decl (%eax)
+ js __read_lock_failed
+ ret
+"
+);
+#endif
diff --git a/xen/arch/i386/setup.c b/xen/arch/i386/setup.c
new file mode 100644
index 0000000000..f4f62c2b02
--- /dev/null
+++ b/xen/arch/i386/setup.c
@@ -0,0 +1,375 @@
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <asm/bitops.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/domain_page.h>
+
+struct cpuinfo_x86 boot_cpu_data = { 0 };
+/* Lots of nice things, since we only target PPro+. */
+unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
+unsigned long wait_init_idle;
+
+/* Basic page table for each CPU in the system. */
+l2_pgentry_t *idle_pg_table[NR_CPUS] = { idle0_pg_table };
+
+/* for asm/domain_page.h, map_domain_page() */
+unsigned long *mapcache[NR_CPUS];
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+ u32 f1, f2;
+
+ asm("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl %2,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl\n\t"
+ : "=&r" (f1), "=&r" (f2)
+ : "ir" (flag));
+
+ return ((f1^f2) & flag) != 0;
+}
+
+/* Probe for the CPUID instruction */
+static int __init have_cpuid_p(void)
+{
+ return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+ char *v = c->x86_vendor_id;
+
+ if (!strcmp(v, "GenuineIntel"))
+ c->x86_vendor = X86_VENDOR_INTEL;
+ else if (!strcmp(v, "AuthenticAMD"))
+ c->x86_vendor = X86_VENDOR_AMD;
+ else if (!strcmp(v, "CyrixInstead"))
+ c->x86_vendor = X86_VENDOR_CYRIX;
+ else if (!strcmp(v, "UMC UMC UMC "))
+ c->x86_vendor = X86_VENDOR_UMC;
+ else if (!strcmp(v, "CentaurHauls"))
+ c->x86_vendor = X86_VENDOR_CENTAUR;
+ else if (!strcmp(v, "NexGenDriven"))
+ c->x86_vendor = X86_VENDOR_NEXGEN;
+ else if (!strcmp(v, "RiseRiseRise"))
+ c->x86_vendor = X86_VENDOR_RISE;
+ else if (!strcmp(v, "GenuineTMx86") ||
+ !strcmp(v, "TransmetaCPU"))
+ c->x86_vendor = X86_VENDOR_TRANSMETA;
+ else
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+ /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
+ if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
+ clear_bit(X86_FEATURE_SEP, &c->x86_capability);
+}
+
+static void __init init_amd(struct cpuinfo_x86 *c)
+{
+ /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ clear_bit(0*32+31, &c->x86_capability);
+
+ switch(c->x86)
+ {
+ case 5:
+ panic("AMD K6 is not supported.\n");
+ case 6: /* An Athlon/Duron. We can trust the BIOS probably */
+ break;
+ }
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+ extern int opt_noht, opt_noacpi;
+ int junk, i;
+ u32 xlvl, tfms;
+
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+ c->cpuid_level = -1; /* CPUID not detected */
+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_vendor_id[0] = '\0'; /* Unset */
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+ if ( !have_cpuid_p() )
+ panic("Ancient processors not supported\n");
+
+ /* Get vendor name */
+ cpuid(0x00000000, &c->cpuid_level,
+ (int *)&c->x86_vendor_id[0],
+ (int *)&c->x86_vendor_id[8],
+ (int *)&c->x86_vendor_id[4]);
+
+ get_cpu_vendor(c);
+
+ if ( c->cpuid_level == 0 )
+ panic("Decrepit CPUID not supported\n");
+
+ cpuid(0x00000001, &tfms, &junk, &junk,
+ &c->x86_capability[0]);
+ c->x86 = (tfms >> 8) & 15;
+ c->x86_model = (tfms >> 4) & 15;
+ c->x86_mask = tfms & 15;
+
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+ if ( xlvl >= 0x80000001 )
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ }
+
+ /* Transmeta-defined flags: level 0x80860001 */
+ xlvl = cpuid_eax(0x80860000);
+ if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+ if ( xlvl >= 0x80860001 )
+ c->x86_capability[2] = cpuid_edx(0x80860001);
+ }
+
+ printk("CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+ c->x86_capability[0],
+ c->x86_capability[1],
+ c->x86_capability[2],
+ c->x86_vendor);
+
+ switch ( c->x86_vendor ) {
+ case X86_VENDOR_INTEL:
+ init_intel(c);
+ break;
+ case X86_VENDOR_AMD:
+ init_amd(c);
+ break;
+ default:
+ panic("Only support Intel processors (P6+)\n");
+ }
+
+ if ( opt_noht )
+ {
+ opt_noacpi = 1; /* Virtual CPUs only appear in ACPI tables. */
+ clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);
+ }
+
+ printk("CPU caps: %08x %08x %08x %08x\n",
+ c->x86_capability[0],
+ c->x86_capability[1],
+ c->x86_capability[2],
+ c->x86_capability[3]);
+
+ /*
+ * On SMP, boot_cpu_data holds the common feature set between
+ * all CPUs; so make sure that we indicate which features are
+ * common between the CPUs. The first time this routine gets
+ * executed, c == &boot_cpu_data.
+ */
+ if ( c != &boot_cpu_data ) {
+ /* AND the already accumulated flags with these */
+ for ( i = 0 ; i < NCAPINTS ; i++ )
+ boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+ }
+}
+
+
+unsigned long cpu_initialized;
+void __init cpu_init(void)
+{
+ int nr = smp_processor_id();
+ struct tss_struct * t = &init_tss[nr];
+ l2_pgentry_t *pl2e;
+
+ if ( test_and_set_bit(nr, &cpu_initialized) )
+ panic("CPU#%d already initialized!!!\n", nr);
+ printk("Initializing CPU#%d\n", nr);
+
+ /* Set up GDT and IDT. */
+ SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
+ SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
+ __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt));
+ __asm__ __volatile__("lidt %0": "=m" (idt_descr));
+
+ /* No nested task. */
+ __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+
+ /* Ensure FPU gets initialised for each domain. */
+ stts();
+
+ /* Set up and load the per-CPU TSS and LDT. */
+ t->ss0 = __HYPERVISOR_DS;
+ t->esp0 = current->thread.esp0;
+ set_tss_desc(nr,t);
+ load_TR(nr);
+ __asm__ __volatile__("lldt %%ax"::"a" (0));
+
+ /* Clear all 6 debug registers. */
+#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
+ CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+#undef CD
+
+ /* Install correct page table. */
+ __asm__ __volatile__ ("movl %%eax,%%cr3"
+ : : "a" (pagetable_val(current->mm.pagetable)));
+
+ /* Set up mapping cache for domain pages. */
+ pl2e = idle_pg_table[nr] + (MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT);
+ mapcache[nr] = (unsigned long *)get_free_page(GFP_KERNEL);
+ clear_page(mapcache[nr]);
+ *pl2e = mk_l2_pgentry(__pa(mapcache[nr]) | PAGE_HYPERVISOR);
+}
+
+static void __init do_initcalls(void)
+{
+ initcall_t *call;
+
+ call = &__initcall_start;
+ do {
+ (*call)();
+ call++;
+ } while (call < &__initcall_end);
+}
+
+/*
+ * IBM-compatible BIOSes place drive info tables at initial interrupt
+ * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs.
+ */
+struct drive_info_struct { unsigned char dummy[32]; } drive_info;
+void get_bios_driveinfo(void)
+{
+ unsigned long seg, off, tab1, tab2;
+
+ off = (unsigned long)*(unsigned short *)(4*0x41+0);
+ seg = (unsigned long)*(unsigned short *)(4*0x41+2);
+ tab1 = (seg<<4) + off;
+
+ off = (unsigned long)*(unsigned short *)(4*0x46+0);
+ seg = (unsigned long)*(unsigned short *)(4*0x46+2);
+ tab2 = (seg<<4) + off;
+
+ printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n",
+ tab1, tab2);
+
+ memcpy(drive_info.dummy+ 0, (char *)tab1, 16);
+ memcpy(drive_info.dummy+16, (char *)tab2, 16);
+}
+
+
+unsigned long pci_mem_start = 0x10000000;
+
+void __init start_of_day(void)
+{
+ extern void trap_init(void);
+ extern void init_IRQ(void);
+ extern void time_init(void);
+ extern void softirq_init(void);
+ extern void timer_bh(void);
+ extern void tqueue_bh(void);
+ extern void immediate_bh(void);
+ extern void init_timervecs(void);
+ extern void disable_pit(void);
+ extern void ac_timer_init(void);
+ extern int setup_network_devices(void);
+ extern void net_init(void);
+ extern void initialize_block_io(void);
+ extern void initialize_keytable();
+ extern void initialize_serial(void);
+ extern void initialize_keyboard(void);
+
+ unsigned long low_mem_size;
+
+ /*
+ * We do this early, but tables are in the lowest 1MB (usually
+ * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered.
+ */
+ get_bios_driveinfo();
+
+ /* Tell the PCI layer not to allocate too close to the RAM area.. */
+ low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+ if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
+
+ identify_cpu(&boot_cpu_data); /* get CPU type info */
+ if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
+ if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT);
+ find_smp_config(); /* find ACPI tables */
+ smp_alloc_memory(); /* trampoline which other CPUs jump at */
+ paging_init(); /* not much here now, but sets up fixmap */
+ if ( smp_found_config ) get_smp_config();
+ domain_init();
+ scheduler_init();
+ trap_init();
+ init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */
+ time_init(); /* installs software handler for HZ clock. */
+ softirq_init();
+ init_timervecs();
+ init_bh(TIMER_BH, timer_bh);
+ init_bh(TQUEUE_BH, tqueue_bh);
+ init_bh(IMMEDIATE_BH, immediate_bh);
+ init_apic_mappings(); /* make APICs addressable in our pagetables. */
+
+#ifndef CONFIG_SMP
+ APIC_init_uniprocessor();
+#else
+ smp_boot_cpus(); /*
+ * Does loads of stuff, including kicking the local
+ * APIC, and the IO APIC after other CPUs are booted.
+ * Each IRQ is preferably handled by IO-APIC, but
+ * fall thru to 8259A if we have to (but slower).
+ */
+#endif
+ initialize_keytable(); /* call back handling for key codes */
+
+ disable_pit(); /* not needed anymore */
+ ac_timer_init(); /* init accurate timers */
+ init_xeno_time(); /* initialise the time */
+ schedulers_start(); /* start scheduler for each CPU */
+
+ sti();
+
+ zap_low_mappings();
+ kmem_cache_init();
+ kmem_cache_sizes_init(max_page);
+#ifdef CONFIG_PCI
+ pci_init();
+#endif
+ do_initcalls();
+
+
+ initialize_serial(); /* setup serial 'driver' (for debugging) */
+ initialize_keyboard(); /* setup keyboard (also for debugging) */
+
+ if ( !setup_network_devices() )
+ panic("Must have a network device!\n");
+ net_init(); /* initializes virtual network system. */
+ initialize_block_io(); /* setup block devices */
+
+
+#ifdef CONFIG_SMP
+ wait_init_idle = cpu_online_map;
+ clear_bit(smp_processor_id(), &wait_init_idle);
+ smp_threads_ready = 1;
+ smp_commence(); /* Tell other CPUs that state of the world is stable. */
+ while (wait_init_idle)
+ {
+ cpu_relax();
+ barrier();
+ }
+#endif
+}
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c
new file mode 100644
index 0000000000..008d1aa83a
--- /dev/null
+++ b/xen/arch/i386/smp.c
@@ -0,0 +1,578 @@
+/*
+ * Intel SMP support routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+
+#include <xeno/irq.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/spinlock.h>
+#include <asm/smp.h>
+#include <asm/mc146818rtc.h>
+#include <asm/pgalloc.h>
+#include <asm/smpboot.h>
+
+/*
+ * Some notes on x86 processor bugs affecting SMP operation:
+ *
+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ * The Linux implications for SMP are handled as follows:
+ *
+ * Pentium III / [Xeon]
+ * None of the E1AP-E3AP errata are visible to the user.
+ *
+ * E1AP. see PII A1AP
+ * E2AP. see PII A2AP
+ * E3AP. see PII A3AP
+ *
+ * Pentium II / [Xeon]
+ * None of the A1AP-A3AP errata are visible to the user.
+ *
+ * A1AP. see PPro 1AP
+ * A2AP. see PPro 2AP
+ * A3AP. see PPro 7AP
+ *
+ * Pentium Pro
+ * None of 1AP-9AP errata are visible to the normal user,
+ * except occasional delivery of 'spurious interrupt' as trap #15.
+ * This is very rare and a non-problem.
+ *
+ * 1AP. Linux maps APIC as non-cacheable
+ * 2AP. worked around in hardware
+ * 3AP. fixed in C0 and above steppings microcode update.
+ * Linux does not use excessive STARTUP_IPIs.
+ * 4AP. worked around in hardware
+ * 5AP. symmetric IO mode (normal Linux operation) not affected.
+ * 'noapic' mode has vector 0xf filled out properly.
+ * 6AP. 'noapic' mode might be affected - fixed in later steppings
+ * 7AP. We do not assume writes to the LVT deassering IRQs
+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup
+ * 9AP. We do not use mixed mode
+ *
+ * Pentium
+ * There is a marginal case where REP MOVS on 100MHz SMP
+ * machines with B stepping processors can fail. XXX should provide
+ * an L1cache=Writethrough or L1cache=off option.
+ *
+ * B stepping CPUs may hang. There are hardware work arounds
+ * for this. We warn about it in case your board doesnt have the work
+ * arounds. Basically thats so I can tell anyone with a B stepping
+ * CPU and SMP problems "tough".
+ *
+ * Specific items [From Pentium Processor Specification Update]
+ *
+ * 1AP. Linux doesn't use remote read
+ * 2AP. Linux doesn't trust APIC errors
+ * 3AP. We work around this
+ * 4AP. Linux never generated 3 interrupts of the same priority
+ * to cause a lost local interrupt.
+ * 5AP. Remote read is never used
+ * 6AP. not affected - worked around in hardware
+ * 7AP. not affected - worked around in hardware
+ * 8AP. worked around in hardware - we get explicit CS errors if not
+ * 9AP. only 'noapic' mode affected. Might generate spurious
+ * interrupts, we log only the first one and count the
+ * rest silently.
+ * 10AP. not affected - worked around in hardware
+ * 11AP. Linux reads the APIC between writes to avoid this, as per
+ * the documentation. Make sure you preserve this as it affects
+ * the C stepping chips too.
+ * 12AP. not affected - worked around in hardware
+ * 13AP. not affected - worked around in hardware
+ * 14AP. we always deassert INIT during bootup
+ * 15AP. not affected - worked around in hardware
+ * 16AP. not affected - worked around in hardware
+ * 17AP. not affected - worked around in hardware
+ * 18AP. not affected - worked around in hardware
+ * 19AP. not affected - worked around in BIOS
+ *
+ * If this sounds worrying believe me these bugs are either ___RARE___,
+ * or are signal timing bugs worked around in hardware and there's
+ * about nothing of note with C stepping upwards.
+ */
+
+/* The 'big kernel lock' */
+spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+
+struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { 0 }};
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+ return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+ return SET_APIC_DEST_FIELD(mask);
+}
+
+static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+ /*
+ * Subtle. In the case of the 'never do double writes' workaround
+ * we have to lock out interrupts to be safe. As we don't care
+ * of the value read we use an atomic rmw access to avoid costly
+ * cli/sti. Otherwise we use an even cheaper single atomic write
+ * to the APIC.
+ */
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ /*
+ * No need to touch the target chip field
+ */
+ cfg = __prepare_ICR(shortcut, vector);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+}
+
+void send_IPI_self(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+static inline void send_IPI_mask_bitmask(int mask, int vector)
+{
+ unsigned long cfg;
+ unsigned long flags;
+
+ __save_flags(flags);
+ __cli();
+
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(mask);
+ apic_write_around(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+
+ __restore_flags(flags);
+}
+
+static inline void send_IPI_mask_sequence(int mask, int vector)
+{
+ unsigned long cfg, flags;
+ unsigned int query_cpu, query_mask;
+
+ __save_flags(flags);
+ __cli();
+
+ for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+ query_mask = 1 << query_cpu;
+ if (query_mask & mask) {
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
+ apic_write_around(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+ }
+ }
+ __restore_flags(flags);
+}
+
+static inline void send_IPI_mask(int mask, int vector)
+{
+ send_IPI_mask_bitmask(mask, vector);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ /*
+ * if there are no other CPUs in the system then
+ * we get an APIC send error if we try to broadcast.
+ * thus we have to avoid sending IPIs in this case.
+ */
+ if (!(smp_num_cpus > 1))
+ return;
+
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+/*
+ * Smarter SMP flushing macros.
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+ * writing to user space from interrupts. (Its not allowed anyway).
+ *
+ * Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static volatile unsigned long flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
+#define FLUSH_ALL 0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm.cpu_vm_mask.
+ */
+static void inline leave_mm (unsigned long cpu)
+{
+ if (cpu_tlbstate[cpu].state == TLBSTATE_OK)
+ BUG();
+ clear_bit(cpu, &cpu_tlbstate[cpu].active_mm->cpu_vm_mask);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask);
+ * Stop ipi delivery for the old mm. This is not synchronized with
+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * for the wrong mm, and in the worst case we perform a superflous
+ * tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ * was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ * Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask);
+ * Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ * cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ * flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * Atomically set the bit [other cpus will start sending flush ipis],
+ * and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ * runs in kernel space, the cpu could load tlb entries for user space
+ * pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+asmlinkage void smp_invalidate_interrupt (void)
+{
+ unsigned long cpu = smp_processor_id();
+
+ if (!test_bit(cpu, &flush_cpumask))
+ return;
+ /*
+ * This was a BUG() but until someone can quote me the
+ * line from the intel manual that guarantees an IPI to
+ * multiple CPUs is retried _only_ on the erroring CPUs
+ * its staying as a return
+ *
+ * BUG();
+ */
+
+ if (flush_mm == cpu_tlbstate[cpu].active_mm) {
+ if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
+ if (flush_va == FLUSH_ALL)
+ local_flush_tlb();
+ else
+ __flush_tlb_one(flush_va);
+ } else
+ leave_mm(cpu);
+ }
+ ack_APIC_irq();
+ clear_bit(cpu, &flush_cpumask);
+}
+
+static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
+ unsigned long va)
+{
+ /*
+ * A couple of (to be removed) sanity checks:
+ *
+ * - we do not send IPIs to not-yet booted CPUs.
+ * - current CPU must not be in mask
+ * - mask must exist :)
+ */
+ if (!cpumask)
+ BUG();
+ if ((cpumask & cpu_online_map) != cpumask)
+ BUG();
+ if (cpumask & (1 << smp_processor_id()))
+ BUG();
+ if (!mm)
+ BUG();
+
+ /*
+ * i'm not happy about this global shared spinlock in the
+ * MM hot path, but we'll see how contended it is.
+ * Temporarily this turns IRQs off, so that lockups are
+ * detected by the NMI watchdog.
+ */
+ spin_lock(&tlbstate_lock);
+
+ flush_mm = mm;
+ flush_va = va;
+ atomic_set_mask(cpumask, &flush_cpumask);
+ /*
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+ while (flush_cpumask)
+ /* nothing. lockup detection does not belong here */;
+
+ flush_mm = NULL;
+ flush_va = 0;
+ spin_unlock(&tlbstate_lock);
+}
+
+void flush_tlb_current_task(void)
+{
+ struct mm_struct *mm = &current->mm;
+ unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+ local_flush_tlb();
+ if (cpu_mask)
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+ unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+ if (current->active_mm == mm)
+ local_flush_tlb();
+ if (cpu_mask)
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+#if 0
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long cpu_mask = mm.cpu_vm_mask & ~(1 << smp_processor_id());
+
+ if (current->active_mm == mm) {
+ if(current->mm)
+ __flush_tlb_one(va);
+ else
+ leave_mm(smp_processor_id());
+ }
+
+ if (cpu_mask)
+ flush_tlb_others(cpu_mask, mm, va);
+}
+#endif
+
+static inline void do_flush_tlb_all_local(void)
+{
+ unsigned long cpu = smp_processor_id();
+
+ __flush_tlb_all();
+ if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY)
+ leave_mm(cpu);
+}
+
+static void flush_tlb_all_ipi(void* info)
+{
+ do_flush_tlb_all_local();
+}
+
+void flush_tlb_all(void)
+{
+ smp_call_function (flush_tlb_all_ipi,0,1,1);
+
+ do_flush_tlb_all_local();
+}
+
+void smp_send_event_check_mask(unsigned long cpu_mask)
+{
+ send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
+struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ atomic_t started;
+ atomic_t finished;
+ int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler, or bottom halfs.
+ */
+{
+ struct call_data_struct data;
+ int cpus = smp_num_cpus-1;
+
+ if (!cpus)
+ return 0;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ spin_lock(&call_lock);
+ call_data = &data;
+ wmb();
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ barrier();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ barrier();
+
+ spin_unlock(&call_lock);
+
+ return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+ /*
+ * Remove this CPU:
+ */
+ clear_bit(smp_processor_id(), &cpu_online_map);
+ __cli();
+ disable_local_APIC();
+ for(;;) __asm__("hlt");
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+ smp_call_function(stop_this_cpu, NULL, 1, 0);
+ smp_num_cpus = 1;
+
+ __cli();
+ disable_local_APIC();
+ __sti();
+}
+
+/*
+ * Nothing to do, as all the work is done automatically when
+ * we return from the interrupt.
+ */
+asmlinkage void smp_event_check_interrupt(void)
+{
+ ack_APIC_irq();
+}
+
+asmlinkage void smp_call_function_interrupt(void)
+{
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+ int wait = call_data->wait;
+
+ ack_APIC_irq();
+ /*
+ * Notify initiating CPU that I've grabbed the data and am
+ * about to execute the function
+ */
+ mb();
+ atomic_inc(&call_data->started);
+ /*
+ * At this point the info structure may be out of scope unless wait==1
+ */
+ (*func)(info);
+ if (wait) {
+ mb();
+ atomic_inc(&call_data->finished);
+ }
+}
+
diff --git a/xen/arch/i386/smpboot.c b/xen/arch/i386/smpboot.c
new file mode 100644
index 0000000000..0955db82f3
--- /dev/null
+++ b/xen/arch/i386/smpboot.c
@@ -0,0 +1,960 @@
+/*
+ * x86 SMP booting functions
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Much of the core SMP work is based on previous work by Thomas Radke, to
+ * whom a great many thanks are extended.
+ *
+ * Thanks to Intel for making available several different Pentium,
+ * Pentium Pro and Pentium-II/Xeon MP machines.
+ * Original development of Linux SMP code supported by Caldera.
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ *
+ * Fixes
+ * Felix Koop : NR_CPUS used properly
+ * Jose Renau : Handle single CPU case.
+ * Alan Cox : By repeated request 8) - Total BogoMIP report.
+ * Greg Wright : Fix for kernel stacks panic.
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Matthias Sattler : Changes for 2.1 kernel map.
+ * Michel Lespinasse : Changes for 2.1 kernel map.
+ * Michael Chastain : Change trampoline.S to gnu as.
+ * Alan Cox : Dumb bug: 'B' step PPro's are fine
+ * Ingo Molnar : Added APIC timers, based on code
+ * from Jose Renau
+ * Ingo Molnar : various cleanups and rewrites
+ * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs
+ * Martin J. Bligh : Added support for multi-quad systems
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/mc146818rtc.h>
+#include <asm/smpboot.h>
+#include <xeno/smp.h>
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/lib.h>
+
+/* Set if we find a B stepping CPU */
+static int smp_b_stepping;
+
+/* Setup configured maximum number of CPUs to activate */
+static int max_cpus = -1;
+
+/* Total count of live CPUs */
+int smp_num_cpus = 1;
+
+/* Bitmask of currently online CPUs */
+unsigned long cpu_online_map;
+
+static volatile unsigned long cpu_callin_map;
+static volatile unsigned long cpu_callout_map;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end [];
+static unsigned char *trampoline_base;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+ memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
+}
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ trampoline_base = __va(0x90000);
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+void __init smp_store_cpu_info(int id)
+{
+ struct cpuinfo_x86 *c = cpu_data + id;
+
+ *c = boot_cpu_data;
+ c->pte_quick = 0;
+ c->pmd_quick = 0;
+ c->pgd_quick = 0;
+ c->pgtable_cache_sz = 0;
+ identify_cpu(c);
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if (c->x86_vendor == X86_VENDOR_INTEL &&
+ c->x86 == 5 &&
+ c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_model <= 3)
+ /*
+ * Remember we have B step Pentia with bugs
+ */
+ smp_b_stepping = 1;
+}
+
+/*
+ * Architecture specific routine called by the kernel just before init is
+ * fired off. This allows the BP to have everything in order [we hope].
+ * At the end of this all the APs will hit the system scheduling and off
+ * we go. Each AP will load the system gdt's and jump through the kernel
+ * init into idle(). At this point the scheduler will one day take over
+ * and give them jobs to do. smp_callin is a standard routine
+ * we use to track CPUs as they power up.
+ */
+
+static atomic_t smp_commenced = ATOMIC_INIT(0);
+
+void __init smp_commence(void)
+{
+ /*
+ * Lets the callins below out of their loop.
+ */
+ Dprintk("Setting commenced=1, go go go\n");
+
+ wmb();
+ atomic_set(&smp_commenced,1);
+}
+
+/*
+ * TSC synchronization.
+ *
+ * We first check wether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+/*
+ * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
+ * multiplication. Not terribly optimized but we need it at boot time only
+ * anyway.
+ *
+ * result == a / b
+ * == (a1 + a2*(2^32)) / b
+ * == a1/b + a2*(2^32/b)
+ * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
+ * ^---- (this multiplication can overflow)
+ */
+
+static unsigned long long div64 (unsigned long long a, unsigned long b0)
+{
+ unsigned int a1, a2;
+ unsigned long long res;
+
+ a1 = ((unsigned int*)&a)[0];
+ a2 = ((unsigned int*)&a)[1];
+
+ res = a1/b0 +
+ (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
+ a2 / b0 +
+ (a2 * (0xffffffff % b0)) / b0;
+
+ return res;
+}
+
+static void __init synchronize_tsc_bp (void)
+{
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ int buggy = 0;
+
+ printk("checking TSC synchronization across CPUs: ");
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < smp_num_cpus; i++) {
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ avg = div64(sum, smp_num_cpus);
+
+ sum = 0;
+ for (i = 0; i < smp_num_cpus; i++) {
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*ticks_per_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = div64(delta, ticks_per_usec);
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
+ i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+ int i;
+
+ /*
+ * smp_num_cpus is not necessarily known at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
+ }
+}
+#undef NR_LOOPS
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+ int cpuid, phys_id, i;
+
+ /*
+ * If waken up by an INIT in an 82489DX configuration
+ * we may get here before an INIT-deassert IPI reaches
+ * our local APIC. We have to wait for the IPI or we'll
+ * lock up on an APIC access.
+ */
+ while (!atomic_read(&init_deasserted));
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ phys_id = GET_APIC_ID(apic_read(APIC_ID));
+ cpuid = smp_processor_id();
+ if (test_and_set_bit(cpuid, &cpu_online_map)) {
+ printk("huh, phys CPU#%d, CPU#%d already present??\n",
+ phys_id, cpuid);
+ BUG();
+ }
+ Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ for ( i = 0; i < 200; i++ )
+ {
+ if ( test_bit(cpuid, &cpu_callout_map) ) break;
+ mdelay(10);
+ }
+
+ if (!test_bit(cpuid, &cpu_callout_map)) {
+ printk("BUG: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ BUG();
+ }
+
+ /*
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
+ */
+
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+
+ setup_local_APIC();
+
+ __sti();
+
+#ifdef CONFIG_MTRR
+ /*
+ * Must be done before calibration delay is computed
+ */
+ mtrr_init_secondary_cpu ();
+#endif
+
+ Dprintk("Stack at about %p\n",&cpuid);
+
+ /*
+ * Save our processor parameters
+ */
+ smp_store_cpu_info(cpuid);
+
+ /*
+ * Allow the master to continue.
+ */
+ set_bit(cpuid, &cpu_callin_map);
+
+ /*
+ * Synchronize the TSC with the BP
+ */
+ synchronize_tsc_ap();
+}
+
+int cpucount;
+
+/*
+ * Activate a secondary processor.
+ */
+int __init start_secondary(void *unused)
+{
+ unsigned int cpu = smp_processor_id();
+ /* 6 bytes suitable for passing to LIDT instruction. */
+ unsigned char idt_load[6];
+
+ extern void cpu_init(void);
+
+ /*
+ * Dont put anything before smp_callin(), SMP
+ * booting is too fragile that we want to limit the
+ * things done here to the most necessary things.
+ */
+ cpu_init();
+ smp_callin();
+
+ while (!atomic_read(&smp_commenced))
+ rep_nop();
+
+ /*
+ * At this point, boot CPU has fully initialised the IDT. It is
+ * now safe to make ourselves a private copy.
+ */
+ idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL);
+ memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
+ *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
+ *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
+ __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
+
+ /*
+ * low-memory mappings have been cleared, flush them from the local TLBs
+ * too.
+ */
+ local_flush_tlb();
+
+ cpu_idle();
+ BUG();
+
+ return 0;
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+ /*
+ * We don't actually need to load the full TSS,
+ * basically just the stack pointer and the eip.
+ */
+ asm volatile(
+ "movl %0,%%esp\n\t"
+ "jmp *%1"
+ :
+ :"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+ void * esp;
+ unsigned short ss;
+} stack_start;
+
+/* which physical APIC ID maps to which logical CPU number */
+volatile int physical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which physical APIC ID */
+volatile int cpu_2_physical_apicid[NR_CPUS];
+
+/* which logical APIC ID maps to which logical CPU number */
+volatile int logical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which logical APIC ID */
+volatile int cpu_2_logical_apicid[NR_CPUS];
+
+static inline void init_cpu_to_apicid(void)
+/* Initialize all maps between cpu number and apicids */
+{
+ int apicid, cpu;
+
+ for (apicid = 0; apicid < MAX_APICID; apicid++) {
+ physical_apicid_2_cpu[apicid] = -1;
+ logical_apicid_2_cpu[apicid] = -1;
+ }
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ cpu_2_physical_apicid[cpu] = -1;
+ cpu_2_logical_apicid[cpu] = -1;
+ }
+}
+
+static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
+/*
+ * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+ physical_apicid_2_cpu[apicid] = cpu;
+ cpu_2_physical_apicid[cpu] = apicid;
+}
+
+static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
+/*
+ * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+ physical_apicid_2_cpu[apicid] = -1;
+ cpu_2_physical_apicid[cpu] = -1;
+}
+
+#if APIC_DEBUG
+static inline void inquire_remote_apic(int apicid)
+{
+ int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+ char *names[] = { "ID", "VERSION", "SPIV" };
+ int timeout, status;
+
+ printk("Inquiring remote APIC #%d...\n", apicid);
+
+ for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ printk("... APIC #%d %s: ", apicid, names[i]);
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+ timeout = 0;
+ do {
+ udelay(100);
+ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+ switch (status) {
+ case APIC_ICR_RR_VALID:
+ status = apic_read(APIC_RRR);
+ printk("%08x\n", status);
+ break;
+ default:
+ printk("failed\n");
+ }
+ }
+}
+#endif
+
+
+static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
+
+ Dprintk("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /*
+ * Send IPI
+ */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ mdelay(10);
+
+ Dprintk("Deasserting INIT.\n");
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Send IPI */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ atomic_set(&init_deasserted, 1);
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ Dprintk("#startup loops: %d.\n", num_starts);
+
+ maxlvt = get_maxlvt();
+
+ for (j = 1; j <= num_starts; j++) {
+ Dprintk("Sending STARTUP #%d.\n",j);
+
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ Dprintk("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_STARTUP
+ | (start_eip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ Dprintk("After Startup.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+
+extern unsigned long cpu_initialized;
+
+static void __init do_boot_cpu (int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ */
+{
+ struct task_struct *idle;
+ unsigned long boot_error = 0;
+ int timeout, cpu;
+ unsigned long start_eip;
+ l2_pgentry_t *pagetable;
+
+ cpu = ++cpucount;
+ /*
+ * We can't use kernel_thread since we must avoid to reschedule the child.
+ */
+ if ( (idle = do_newdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
+ panic("failed 'newdomain' for CPU %d", cpu);
+
+ pagetable = (void *)get_free_page(GFP_KERNEL);
+ memcpy(pagetable, idle0_pg_table, PAGE_SIZE);
+ idle_pg_table[cpu] = pagetable;
+ idle->mm.pagetable = mk_pagetable(__pa(pagetable));
+
+ map_cpu_to_boot_apicid(cpu, apicid);
+
+ idle->thread.esp = idle->thread.esp0 = (unsigned long)idle + THREAD_SIZE;
+ idle->thread.eip = (unsigned long) start_secondary;
+
+ SET_DEFAULT_FAST_TRAP(&idle->thread);
+
+ /* start_eip had better be page-aligned! */
+ start_eip = setup_trampoline();
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+ stack_start.esp = (void *) (1024+PAGE_SIZE+(char *)idle-__PAGE_OFFSET);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+ Dprintk("Setting warm reset code and vector.\n");
+
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ Dprintk("3.\n");
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (APIC_INTEGRATED(apic_version[apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ /*
+ * Status is now clean
+ */
+ boot_error = 0;
+
+ /*
+ * Starting actual IPI sequence...
+ */
+
+ boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ set_bit(cpu, &cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (test_bit(cpu, &cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (test_bit(cpu, &cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ printk("CPU%d has booted.\n", cpu);
+ } else {
+ boot_error= 1;
+ if (*((volatile unsigned char *)phys_to_virt(8192))
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+ else
+ /* trampoline code not run */
+ printk("Not responding.\n");
+#if APIC_DEBUG
+ inquire_remote_apic(apicid);
+#endif
+ }
+ }
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_boot_apicid(cpu, apicid);
+ clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
+ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+ clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
+ cpucount--;
+ }
+
+ /* mark "stuck" area as not stuck */
+ *((volatile unsigned long *)phys_to_virt(8192)) = 0;
+}
+
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+static int boot_cpu_logical_apicid;
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio = NULL;
+
+void __init smp_boot_cpus(void)
+{
+ int apicid, bit;
+
+#ifdef CONFIG_MTRR
+ /* Must be done before other processors booted */
+ mtrr_init_boot_cpu ();
+#endif
+ /* Initialize the logical to physical CPU number mapping */
+ init_cpu_to_apicid();
+
+ /*
+ * Setup boot CPU information
+ */
+ smp_store_cpu_info(0); /* Final full version of the data */
+ printk("CPU%d booted\n", 0);
+
+ /*
+ * We have the boot CPU online for sure.
+ */
+ set_bit(0, &cpu_online_map);
+ boot_cpu_logical_apicid = logical_smp_processor_id();
+ map_cpu_to_boot_apicid(0, boot_cpu_apicid);
+
+ /*
+ * If we couldnt find an SMP configuration at boot time,
+ * get out of here now!
+ */
+ if (!smp_found_config) {
+ printk("SMP motherboard not detected.\n");
+ io_apic_irqs = 0;
+ cpu_online_map = phys_cpu_present_map = 1;
+ smp_num_cpus = 1;
+ if (APIC_init_uniprocessor())
+ printk("Local APIC not detected."
+ " Using dummy APIC emulation.\n");
+ goto smp_done;
+ }
+
+ /*
+ * Should not be necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ */
+ if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ boot_cpu_physical_apicid);
+ phys_cpu_present_map |= (1 << hard_smp_processor_id());
+ }
+
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+ !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
+ printk("BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+ io_apic_irqs = 0;
+ cpu_online_map = phys_cpu_present_map = 1;
+ smp_num_cpus = 1;
+ goto smp_done;
+ }
+
+ verify_local_APIC();
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus) {
+ smp_found_config = 0;
+ printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+ io_apic_irqs = 0;
+ cpu_online_map = phys_cpu_present_map = 1;
+ smp_num_cpus = 1;
+ goto smp_done;
+ }
+
+ connect_bsp_APIC();
+ setup_local_APIC();
+
+ if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
+ BUG();
+
+ /*
+ * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+ *
+ * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+ * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
+ * clustered apic ID.
+ */
+ Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
+
+ for (bit = 0; bit < NR_CPUS; bit++) {
+ apicid = cpu_present_to_apicid(bit);
+ /*
+ * Don't even attempt to start the boot CPU!
+ */
+ if (apicid == boot_cpu_apicid)
+ continue;
+
+ if (!(phys_cpu_present_map & (1 << bit)))
+ continue;
+ if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
+ continue;
+
+ do_boot_cpu(apicid);
+
+ /*
+ * Make sure we unmap all failed CPUs
+ */
+ if ((boot_apicid_to_cpu(apicid) == -1) &&
+ (phys_cpu_present_map & (1 << bit)))
+ printk("CPU #%d not responding - cannot use it.\n",
+ apicid);
+ }
+
+ /*
+ * Cleanup possible dangling ends...
+ */
+ /*
+ * Install writable page 0 entry to set BIOS data area.
+ */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
+
+ *((volatile long *) phys_to_virt(0x467)) = 0;
+
+ if (!cpucount) {
+ printk("Error: only one processor found.\n");
+ } else {
+ printk("Total of %d processors activated.\n", cpucount+1);
+ }
+ smp_num_cpus = cpucount + 1;
+
+ if (smp_b_stepping)
+ printk("WARNING: SMP operation may"
+ " be unreliable with B stepping processors.\n");
+ Dprintk("Boot done.\n");
+
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if ( nr_ioapics ) setup_IO_APIC();
+
+ /* Set up all local APIC timers in the system. */
+ setup_APIC_clocks();
+
+ /* Synchronize the TSC with the AP(s). */
+ if ( cpucount ) synchronize_tsc_bp();
+
+ smp_done:
+ ;
+}
diff --git a/xen/arch/i386/time.c b/xen/arch/i386/time.c
new file mode 100644
index 0000000000..773c4cfeb3
--- /dev/null
+++ b/xen/arch/i386/time.c
@@ -0,0 +1,434 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: i386/time.c
+ * Author:
+ * Changes:
+ *
+ * Date: Jan 2003
+ *
+ * Environment: Xen Hypervisor
+ * Description: modified version of Linux' time.c
+ * implements system and wall clock time.
+ * based on freebsd's implementation.
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+/*
+ * linux/arch/i386/kernel/time.c
+ *
+ * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+ */
+
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+
+#include <asm/io.h>
+#include <xeno/smp.h>
+#include <xeno/irq.h>
+#include <asm/msr.h>
+#include <asm/mpspec.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/mc146818rtc.h>
+
+#ifdef TIME_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
+
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+
+int timer_ack=0;
+extern spinlock_t i8259A_lock;
+static inline void do_timer_interrupt(int irq,
+ void *dev_id, struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_IO_APIC
+ if (timer_ack) {
+ /*
+ * Subtle, when I/O APICs are used we have to ack timer IRQ manually
+ * to reset the IRR bit for do_slow_gettimeoffset(). This will also
+ * deassert NMI lines for the watchdog if run on an 82489DX-based
+ * system.
+ */
+ spin_lock(&i8259A_lock);
+ outb(0x0c, 0x20);
+ /* Ack the IRQ; AEOI will end it automatically. */
+ inb(0x20);
+ spin_unlock(&i8259A_lock);
+ }
+#endif
+ do_timer(regs);
+}
+
+/*
+ * This is only temporarily. Once the APIC s up and running this
+ * timer interrupt is turned off.
+ */
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ do_timer_interrupt(irq, NULL, regs);
+}
+
+static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0,
+ "timer", NULL, NULL};
+
+/* ------ Calibrate the TSC -------
+ * Return processor ticks per second / CALIBRATE_FRAC.
+ */
+
+#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
+#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
+#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+
+static unsigned long __init calibrate_tsc(void)
+{
+ /* Set the Gate high, disable speaker */
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Now let's take care of CTC channel 2
+ *
+ * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
+ * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
+ * to begin countdown.
+ */
+ outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
+ outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
+
+ {
+ unsigned long startlow, starthigh;
+ unsigned long endlow, endhigh;
+ unsigned long count;
+
+ rdtsc(startlow,starthigh);
+ count = 0;
+ do {
+ count++;
+ } while ((inb(0x61) & 0x20) == 0);
+ rdtsc(endlow,endhigh);
+
+ /* Error: ECTCNEVERSET */
+ if (count <= 1)
+ goto bad_ctc;
+
+ /* 64-bit subtract - gcc just messes up with long longs */
+ __asm__("subl %2,%0\n\t"
+ "sbbl %3,%1"
+ :"=a" (endlow), "=d" (endhigh)
+ :"g" (startlow), "g" (starthigh),
+ "0" (endlow), "1" (endhigh));
+
+ /* Error: ECPUTOOFAST */
+ if (endhigh)
+ goto bad_ctc;
+
+ return endlow;
+ }
+
+ /*
+ * The CTC wasn't reliable: we got a hit on the very first read, or the CPU
+ * was so fast/slow that the quotient wouldn't fit in 32 bits..
+ */
+ bad_ctc:
+ return 0;
+}
+
+/***************************************************************************
+ * CMOS Timer functions
+ ***************************************************************************/
+
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long
+mktime (unsigned int year, unsigned int mon,
+ unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec)
+{
+ if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
+ mon += 12; /* Puts Feb last since it has leap day */
+ year -= 1;
+ }
+ return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
+ year*365 - 719499
+ )*24 + hour /* now have hours */
+ )*60 + min /* now have minutes */
+ )*60 + sec; /* finally seconds */
+}
+
+static unsigned long get_cmos_time(void)
+{
+ unsigned int year, mon, day, hour, min, sec;
+ int i;
+
+ spin_lock(&rtc_lock);
+ /* The Linux interpretation of the CMOS clock register contents:
+ * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
+ * RTC registers show the second which has precisely just started.
+ * Let's hope other operating systems interpret the RTC the same way.
+ */
+ /* read RTC exactly on falling edge of update flag */
+ for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+ break;
+ for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
+ if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+ break;
+ do { /* Isn't this overkill ? UIP above should guarantee consistency */
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
+ } while (sec != CMOS_READ(RTC_SECONDS));
+ if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ {
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+ }
+ spin_unlock(&rtc_lock);
+ if ((year += 1900) < 1970)
+ year += 100;
+ printk(".... CMOS Clock: %02d/%02d/%04d %02d:%02d:%02d\n",
+ day, mon, year, hour, min, sec);
+ return mktime(year, mon, day, hour, min, sec);
+}
+
+/***************************************************************************
+ * Time
+ * XXX RN: Will be able to remove some of the locking once the time is
+ * update by the APIC on only one CPU.
+ ***************************************************************************/
+
+static spinlock_t stime_lock;
+static u32 st_scale_f;
+static u32 st_scale_i;
+u32 stime_pcc; /* cycle counter value at last timer irq */
+s_time_t stime_now; /* time in ns at last timer IRQ */
+
+s_time_t get_s_time(void)
+{
+ unsigned long flags;
+ u32 delta_tsc, low, pcc;
+ u64 delta;
+ s_time_t now;
+
+ spin_lock_irqsave(&stime_lock, flags);
+
+ pcc = stime_pcc;
+ now = stime_now;
+
+ /* only use bottom 32bits of TSC. This should be sufficient */
+ rdtscl(low);
+ delta_tsc = low - pcc;
+ delta = ((u64)delta_tsc * st_scale_f);
+ delta >>= 32;
+ delta += ((u64)delta_tsc * st_scale_i);
+
+ spin_unlock_irqrestore(&stime_lock, flags);
+
+ return now + delta;
+}
+
+
+/* Wall Clock time */
+static spinlock_t wctime_lock;
+struct timeval wall_clock_time; /* wall clock time at last update */
+s_time_t wctime_st; /* system time at last update */
+
+void do_gettimeofday(struct timeval *tv)
+{
+ unsigned long flags;
+ unsigned long usec, sec;
+
+ spin_lock_irqsave(&wctime_lock, flags);
+ usec = ((unsigned long)(NOW() - wctime_st))/1000;
+ sec = wall_clock_time.tv_sec;
+ usec += wall_clock_time.tv_usec;
+ spin_unlock_irqrestore(&wctime_lock, flags);
+
+ while (usec >= 1000000) {
+ usec -= 1000000;
+ sec++;
+ }
+ tv->tv_sec = sec;
+ tv->tv_usec = usec;
+}
+
+void do_settimeofday(struct timeval *tv)
+{
+ printk("XXX: do_settimeofday not implemented\n");
+}
+
+/***************************************************************************
+ * Update times
+ ***************************************************************************/
+
+/* update a domains notion of time */
+void update_dom_time(shared_info_t *si)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&stime_lock, flags);
+ si->system_time = stime_now;
+ si->st_timestamp = stime_pcc;
+ spin_unlock_irqrestore(&stime_lock, flags);
+
+ spin_lock_irqsave(&wctime_lock, flags);
+ si->tv_sec = wall_clock_time.tv_sec;
+ si->tv_usec = wall_clock_time.tv_usec;
+ si->wc_timestamp = wctime_st;
+ si->wc_version++;
+ spin_unlock_irqrestore(&wctime_lock, flags);
+
+ TRC(printk(" 0x%08X%08X\n", (u32)(wctime_st>>32), (u32)wctime_st));
+}
+
+/*
+ * Update hypervisors notion of time
+ * This is done periodically of it's own timer
+ */
+static struct ac_timer update_timer;
+static void update_time(unsigned long foo)
+{
+ unsigned long flags;
+ u32 new_pcc;
+ s_time_t new_st;
+ unsigned long usec;
+
+ new_st = NOW();
+ rdtscl(new_pcc);
+
+ /* Update system time. */
+ spin_lock_irqsave(&stime_lock, flags);
+ stime_now = new_st;
+ stime_pcc=new_pcc;
+ /* Don't reeenable IRQs until we release wctime_lock. */
+ spin_unlock(&stime_lock);
+
+ /* Update wall clock time. */
+ spin_lock(&wctime_lock);
+ usec = ((unsigned long)(new_st - wctime_st))/1000;
+ usec += wall_clock_time.tv_usec;
+ while (usec >= 1000000) {
+ usec -= 1000000;
+ wall_clock_time.tv_sec++;
+ }
+ wall_clock_time.tv_usec = usec;
+ wctime_st = new_st;
+ spin_unlock_irqrestore(&wctime_lock, flags);
+
+ TRC(printk("TIME[%02d] update time: stime_now=%lld now=%lld,wct=%ld:%ld\n",
+ smp_processor_id(), stime_now, new_st, wall_clock_time.tv_sec,
+ wall_clock_time.tv_usec));
+
+ /* Reload the timer. */
+ again:
+ update_timer.expires = new_st + MILLISECS(200);
+ if(add_ac_timer(&update_timer) == 1)
+ goto again;
+}
+
+/***************************************************************************
+ * Init Xeno Time
+ * This has to be done after all CPUs have been booted
+ ***************************************************************************/
+int __init init_xeno_time()
+{
+ int cpu = smp_processor_id();
+ u32 cpu_cycle; /* time of one cpu cyle in pico-seconds */
+ u64 scale; /* scale factor */
+
+ spin_lock_init(&stime_lock);
+ spin_lock_init(&wctime_lock);
+
+ printk("Init Time[%02d]:\n", cpu);
+
+ /* System Time */
+ cpu_cycle = (u32) (1000000000LL/cpu_khz); /* in pico seconds */
+ scale = 1000000000LL << 32;
+ scale /= cpu_freq;
+ st_scale_f = scale & 0xffffffff;
+ st_scale_i = scale >> 32;
+
+ /* Wall Clock time */
+ wall_clock_time.tv_sec = get_cmos_time();
+ wall_clock_time.tv_usec = 0;
+
+ /* set starting times */
+ stime_now = (s_time_t)0;
+ rdtscl(stime_pcc);
+ wctime_st = NOW();
+
+ /* start timer to update time periodically */
+ init_ac_timer(&update_timer);
+ update_timer.function = &update_time;
+ update_time(0);
+
+ printk(".... System Time: %lldns\n", NOW());
+ printk(".....cpu_cycle: %u ps\n", cpu_cycle);
+ printk(".... st_scale_f: %X\n", st_scale_f);
+ printk(".... st_scale_i: %X\n", st_scale_i);
+ printk(".... stime_pcc: %u\n", stime_pcc);
+
+ printk(".... Wall Clock: %lds %ldus\n", wall_clock_time.tv_sec,
+ wall_clock_time.tv_usec);
+ printk(".... wctime_st: %lld\n", wctime_st);
+
+ return 0;
+}
+
+
+/***************************************************************************
+ * Init
+ ***************************************************************************/
+
+void __init time_init(void)
+{
+ unsigned long ticks_per_frac = calibrate_tsc();
+
+ if ( !ticks_per_frac )
+ panic("Error calibrating TSC\n");
+
+ ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC);
+ cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+
+ setup_irq(0, &irq0);
+}
diff --git a/xen/arch/i386/trampoline.S b/xen/arch/i386/trampoline.S
new file mode 100644
index 0000000000..f0beef725a
--- /dev/null
+++ b/xen/arch/i386/trampoline.S
@@ -0,0 +1,54 @@
+/*
+ *
+ * Trampoline.S Derived from Setup.S by Linus Torvalds
+ *
+ * 4 Jan 1997 Michael Chastain: changed to gnu as.
+ *
+ * Entry: CS:IP point to the start of our code, we are
+ * in real mode with no stack, but the rest of the
+ * trampoline page to make our stack and everything else
+ * is a mystery.
+ *
+ * On entry to trampoline_data, the processor is in real mode
+ * with 16-bit addressing and 16-bit data. CS has some value
+ * and IP is zero. Thus, data addresses need to be absolute
+ * (no relocation) and are taken with regard to r_base.
+ */
+
+#include <xeno/config.h>
+#include <asm/page.h>
+
+.data
+
+.code16
+
+ENTRY(trampoline_data)
+r_base = .
+ mov %cs, %ax # Code and data in the same place
+ mov %ax, %ds
+
+ movl $0xA5A5A5A5, %ebx # Flag an SMP trampoline
+ cli # We should be safe anyway
+
+ movl $0xA5A5A5A5, trampoline_data - r_base
+
+ lidt idt_48 - r_base # load idt with 0, 0
+ lgdt gdt_48 - r_base # load gdt with whatever is appropriate
+
+ xor %ax, %ax
+ inc %ax # protected mode (PE) bit
+ lmsw %ax # into protected mode
+ jmp flush_instr
+flush_instr:
+ ljmpl $__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET
+
+idt_48:
+ .word 0 # idt limit = 0
+ .word 0, 0 # idt base = 0L
+
+gdt_48:
+ .word 0x0800 # gdt limit = 2048, 256 GDT entries
+ .long gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU)
+
+.globl SYMBOL_NAME(trampoline_end)
+SYMBOL_NAME_LABEL(trampoline_end)
diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c
new file mode 100644
index 0000000000..5fe0858ba3
--- /dev/null
+++ b/xen/arch/i386/traps.c
@@ -0,0 +1,696 @@
+/*
+ * linux/arch/i386/traps.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <asm/ptrace.h>
+#include <xeno/delay.h>
+#include <xeno/spinlock.h>
+#include <xeno/irq.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/desc.h>
+#include <asm/debugreg.h>
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+
+#define GTBF_TRAP 1
+#define GTBF_TRAP_NOCODE 2
+#define GTBF_TRAP_CR2 4
+struct guest_trap_bounce {
+ unsigned long error_code; /* 0 */
+ unsigned long cr2; /* 4 */
+ unsigned short flags; /* 8 */
+ unsigned short cs; /* 10 */
+ unsigned long eip; /* 12 */
+} guest_trap_bounce[NR_CPUS] = { { 0 } };
+
+asmlinkage int hypervisor_call(void);
+asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
+
+/* Master table, and the one used by CPU0. */
+struct desc_struct idt_table[256] = { {0, 0}, };
+/* All other CPUs have their own copy. */
+struct desc_struct *idt_tables[NR_CPUS] = { 0 };
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
+
+int kstack_depth_to_print = 8*20;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+ if (addr >= (unsigned long) &_stext &&
+ addr <= (unsigned long) &_etext)
+ return 1;
+ return 0;
+
+}
+
+void show_trace(unsigned long * stack)
+{
+ int i;
+ unsigned long addr;
+
+ if (!stack)
+ stack = (unsigned long*)&stack;
+
+ printk("Call Trace: ");
+ i = 1;
+ while (((long) stack & (THREAD_SIZE-1)) != 0) {
+ addr = *stack++;
+ if (kernel_text_address(addr)) {
+ if (i && ((i % 6) == 0))
+ printk("\n ");
+ printk("[<%08lx>] ", addr);
+ i++;
+ }
+ }
+ printk("\n");
+}
+
+void show_trace_task(struct task_struct *tsk)
+{
+ unsigned long esp = tsk->thread.esp;
+
+ /* User space on another CPU? */
+ if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
+ return;
+ show_trace((unsigned long *)esp);
+}
+
+void show_stack(unsigned long * esp)
+{
+ unsigned long *stack;
+ int i;
+
+ // debugging aid: "show_stack(NULL);" prints the
+ // back trace for this cpu.
+
+ if(esp==NULL)
+ esp=(unsigned long*)&esp;
+
+ printk("Stack trace from ESP=%p:\n", esp);
+
+ stack = esp;
+ for(i=0; i < kstack_depth_to_print; i++) {
+ if (((long) stack & (THREAD_SIZE-1)) == 0)
+ break;
+ if (i && ((i % 8) == 0))
+ printk("\n ");
+ if ( kernel_text_address(*stack) )
+ printk("[%08lx] ", *stack++);
+ else
+ printk("%08lx ", *stack++);
+ }
+ printk("\n");
+ //show_trace(esp);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+ unsigned long esp;
+ unsigned short ss;
+
+ esp = (unsigned long) (&regs->esp);
+ ss = __HYPERVISOR_DS;
+ if ( regs->xcs & 3 )
+ {
+ esp = regs->esp;
+ ss = regs->xss & 0xffff;
+ }
+
+ printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n",
+ smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
+ printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("ds: %04x es: %04x ss: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff, ss);
+
+ show_stack(&regs->esp);
+}
+
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+ spin_lock_irq(&die_lock);
+ printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff);
+ show_registers(regs);
+ spin_unlock_irq(&die_lock);
+ panic("HYPERVISOR DEATH!!\n");
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+ if (!(3 & regs->xcs)) die(str, regs, err);
+}
+
+static void inline do_trap(int trapnr, char *str,
+ struct pt_regs * regs,
+ long error_code, int use_error_code)
+{
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ trap_info_t *ti;
+ unsigned long addr, fixup;
+
+ if (!(regs->xcs & 3))
+ goto fault_in_hypervisor;
+
+ ti = current->thread.traps + trapnr;
+ if ( trapnr == 14 )
+ {
+ /* page fault pushes %cr2 */
+ gtb->flags = GTBF_TRAP_CR2;
+ __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (gtb->cr2) : );
+ }
+ else
+ {
+ gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE;
+ }
+ gtb->error_code = error_code;
+ gtb->cs = ti->cs;
+ gtb->eip = ti->address;
+ return;
+
+ fault_in_hypervisor:
+
+ if ( (fixup = search_exception_table(regs->eip)) != 0 )
+ {
+ regs->eip = fixup;
+ return;
+ }
+
+ __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
+
+ if ( (trapnr == 14) && (addr >= PAGE_OFFSET) )
+ {
+ unsigned long page;
+ unsigned long *pde;
+ pde = (unsigned long *)idle_pg_table[smp_processor_id()];
+ page = pde[addr >> L2_PAGETABLE_SHIFT];
+ printk("*pde = %08lx\n", page);
+ if ( page & _PAGE_PRESENT )
+ {
+ page &= PAGE_MASK;
+ page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
+ printk(" *pte = %08lx\n", page);
+ }
+ }
+
+ show_registers(regs);
+ panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
+ "[error_code=%08x]\n"
+ "Faulting linear address might be %08lx\n",
+ smp_processor_id(), trapnr, str,
+ error_code, addr);
+}
+
+#define DO_ERROR_NOCODE(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 0); \
+}
+
+#define DO_ERROR(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 1); \
+}
+
+DO_ERROR_NOCODE( 0, "divide error", divide_error)
+DO_ERROR_NOCODE( 3, "int3", int3)
+DO_ERROR_NOCODE( 4, "overflow", overflow)
+DO_ERROR_NOCODE( 5, "bounds", bounds)
+DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
+DO_ERROR_NOCODE( 7, "device not available", device_not_available)
+DO_ERROR( 8, "double fault", double_fault)
+DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, "invalid TSS", invalid_TSS)
+DO_ERROR(11, "segment not present", segment_not_present)
+DO_ERROR(12, "stack segment", stack_segment)
+DO_ERROR(14, "page fault", page_fault)
+/* Vector 15 reserved by Intel */
+DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
+DO_ERROR(17, "alignment check", alignment_check)
+DO_ERROR_NOCODE(18, "machine check", machine_check)
+DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ trap_info_t *ti;
+ unsigned long fixup;
+
+ /* Bad shit if error in ring 0, or result of an interrupt. */
+ if (!(regs->xcs & 3) || (error_code & 1))
+ goto gp_in_kernel;
+
+ /*
+ * Cunning trick to allow arbitrary "INT n" handling.
+ *
+ * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
+ * instruction from trapping to the appropriate vector, when that might not
+ * be expected by Xen or the guest OS. For example, that entry might be for
+ * a fault handler (unlike traps, faults don't increment EIP), or might
+ * expect an error code on the stack (which a software trap never
+ * provides), or might be a hardware interrupt handler that doesn't like
+ * being called spuriously.
+ *
+ * Instead, a GPF occurs with the faulting IDT vector in the error code.
+ * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
+ * clear to indicate that it's a software fault, not hardware.
+ *
+ * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
+ * okay because they can only be triggered by an explicit DPL-checked
+ * instruction. The DPL specified by the guest OS for these vectors is NOT
+ * CHECKED!!
+ */
+ if ( (error_code & 3) == 2 )
+ {
+ /* This fault must be due to <INT n> instruction. */
+ ti = current->thread.traps + (error_code>>3);
+ if ( ti->dpl >= (regs->xcs & 3) )
+ {
+ gtb->flags = GTBF_TRAP_NOCODE;
+ gtb->cs = ti->cs;
+ gtb->eip = ti->address;
+ regs->eip += 2;
+ return;
+ }
+ }
+
+ /* Pass on GPF as is. */
+ ti = current->thread.traps + 13;
+ gtb->flags = GTBF_TRAP;
+ gtb->error_code = error_code;
+ gtb->cs = ti->cs;
+ gtb->eip = ti->address;
+ return;
+
+ gp_in_kernel:
+ if ( (fixup = search_exception_table(regs->eip)) != 0 )
+ {
+ regs->eip = fixup;
+ return;
+ }
+
+ die("general protection fault", regs, error_code);
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+ printk("You probably have a hardware problem with your RAM chips\n");
+
+ /* Clear and disable the memory parity error line. */
+ reason = (reason & 0xf) | 4;
+ outb(reason, 0x61);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+ unsigned long i;
+
+ printk("NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+ reason = (reason & 0xf) | 8;
+ outb(reason, 0x61);
+ i = 2000;
+ while (--i) udelay(1000);
+ reason &= ~8;
+ outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+ printk("Dazed and confused, but trying to continue\n");
+ printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+ unsigned char reason = inb(0x61);
+
+ if (!(reason & 0xc0)) {
+ unknown_nmi_error(reason, regs);
+ return;
+ }
+ if (reason & 0x80)
+ mem_parity_error(reason, regs);
+ if (reason & 0x40)
+ io_check_error(reason, regs);
+ /*
+ * Reassert NMI in case it became active meanwhile
+ * as it's edge-triggered.
+ */
+ outb(0x8f, 0x70);
+ inb(0x71); /* dummy */
+ outb(0x0f, 0x70);
+ inb(0x71); /* dummy */
+}
+
+asmlinkage void math_state_restore(struct pt_regs *regs, long error_code)
+{
+ /* Prevent recursion. */
+ clts();
+
+ if ( !(current->flags & PF_USEDFPU) )
+ {
+ if ( current->flags & PF_DONEFPUINIT )
+ restore_fpu(current);
+ else
+ init_fpu();
+ current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */
+ }
+
+ if ( current->flags & PF_GUEST_STTS )
+ {
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ gtb->flags = GTBF_TRAP_NOCODE;
+ gtb->cs = current->thread.traps[7].cs;
+ gtb->eip = current->thread.traps[7].address;
+ current->flags &= ~PF_GUEST_STTS;
+ }
+}
+
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+ unsigned int condition;
+ struct task_struct *tsk = current;
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+
+ __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+
+ /* Mask out spurious debug traps due to lazy DR7 setting */
+ if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
+ (tsk->thread.debugreg[7] == 0) )
+ {
+ __asm__("movl %0,%%db7" : : "r" (0));
+ return;
+ }
+
+ if ( (regs->xcs & 3) == 0 )
+ {
+ /* Clear TF just for absolute sanity. */
+ regs->eflags &= ~EF_TF;
+ /*
+ * Basically, we ignore watchpoints when they trigger in
+ * the hypervisor. This may happen when a buffer is passed
+ * to us which previously had a watchpoint set on it.
+ * No need to bump EIP; the only faulting trap is an
+ * instruction breakpoint, which can't happen to us.
+ */
+ return;
+ }
+
+ /* Save debug status register where guest OS can peek at it */
+ tsk->thread.debugreg[6] = condition;
+
+ gtb->flags = GTBF_TRAP_NOCODE;
+ gtb->cs = tsk->thread.traps[1].cs;
+ gtb->eip = tsk->thread.traps[1].address;
+}
+
+
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
+ long error_code)
+{ /* nothing */ }
+
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+ "movw %4,%%dx\n\t" \
+ "movl %%eax,%0\n\t" \
+ "movl %%edx,%1" \
+ :"=m" (*((long *) (gate_addr))), \
+ "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+ :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+ "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
+} while (0)
+
+
+/*
+ * This needs to use 'idt_table' rather than 'idt', and
+ * thus use the _nonmapped_ version of the IDT, as the
+ * Pentium F0 0F bugfix can have resulted in the mapped
+ * IDT being write-protected.
+ */
+void set_intr_gate(unsigned int n, void *addr)
+{
+ _set_gate(idt_table+n,14,0,addr);
+}
+
+static void __init set_trap_gate(unsigned int n, void *addr)
+{
+ _set_gate(idt_table+n,15,0,addr);
+}
+
+static void __init set_system_gate(unsigned int n, void *addr)
+{
+ _set_gate(idt_table+n,15,3,addr);
+}
+
+static void __init set_call_gate(void *a, void *addr)
+{
+ _set_gate(a,12,3,addr);
+}
+
+#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
+ *((gate_addr)+1) = ((base) & 0xff000000) | \
+ (((base) & 0x00ff0000)>>16) | \
+ ((limit) & 0xf0000) | \
+ ((dpl)<<13) | \
+ (0x00408000) | \
+ ((type)<<8); \
+ *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
+ ((limit) & 0x0ffff); }
+
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+ "movw %%ax,2(%2)\n\t" \
+ "rorl $16,%%eax\n\t" \
+ "movb %%al,4(%2)\n\t" \
+ "movb %4,5(%2)\n\t" \
+ "movb $0,6(%2)\n\t" \
+ "movb %%ah,7(%2)\n\t" \
+ "rorl $16,%%eax" \
+ : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
+
+void set_tss_desc(unsigned int n, void *addr)
+{
+ _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89);
+}
+
+void __init trap_init(void)
+{
+ set_trap_gate(0,&divide_error);
+ set_trap_gate(1,&debug);
+ set_intr_gate(2,&nmi);
+ set_system_gate(3,&int3); /* usable from all privilege levels */
+ set_system_gate(4,&overflow); /* usable from all privilege levels */
+ set_trap_gate(5,&bounds);
+ set_trap_gate(6,&invalid_op);
+ set_trap_gate(7,&device_not_available);
+ set_trap_gate(8,&double_fault);
+ set_trap_gate(9,&coprocessor_segment_overrun);
+ set_trap_gate(10,&invalid_TSS);
+ set_trap_gate(11,&segment_not_present);
+ set_trap_gate(12,&stack_segment);
+ set_trap_gate(13,&general_protection);
+ set_intr_gate(14,&page_fault);
+ set_trap_gate(15,&spurious_interrupt_bug);
+ set_trap_gate(16,&coprocessor_error);
+ set_trap_gate(17,&alignment_check);
+ set_trap_gate(18,&machine_check);
+ set_trap_gate(19,&simd_coprocessor_error);
+
+ /* Only ring 1 can access monitor services. */
+ _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call);
+
+ /* CPU0 uses the master IDT. */
+ idt_tables[0] = idt_table;
+
+ /*
+ * Should be a barrier for any external CPU state.
+ */
+ {
+ extern void cpu_init(void);
+ cpu_init();
+ }
+}
+
+
+long do_set_trap_table(trap_info_t *traps)
+{
+ trap_info_t cur;
+ trap_info_t *dst = current->thread.traps;
+
+ /*
+ * I'm removing the next line, since it seems more intuitive to use this
+ * as an interface to incrementally update a domain's trap table. Clearing
+ * out old entries automatically is rather antisocial!
+ */
+ /*memset(dst, 0, sizeof(*dst) * 256);*/
+
+ for ( ; ; )
+ {
+ if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
+ if ( (cur.cs & 3) == 0 ) return -EPERM;
+ if ( cur.address == 0 ) break;
+ memcpy(dst+cur.vector, &cur, sizeof(cur));
+ traps++;
+ }
+
+ return(0);
+}
+
+
+long do_set_fast_trap(int idx)
+{
+ trap_info_t *ti;
+
+ /* Index 0 is special: it disables fast traps. */
+ if ( idx == 0 )
+ {
+ CLEAR_FAST_TRAP(&current->thread);
+ SET_DEFAULT_FAST_TRAP(&current->thread);
+ return 0;
+ }
+
+ /*
+ * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
+ * The former range is used by Windows and MS-DOS.
+ * Vector 0x80 is used by Linux and the BSD variants.
+ */
+ if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) ) return -1;
+
+ ti = current->thread.traps + idx;
+
+ CLEAR_FAST_TRAP(&current->thread);
+
+ current->thread.fast_trap_idx = idx;
+ current->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
+ current->thread.fast_trap_desc.b =
+ (ti->address & 0xffff0000) | 0x8f00 | (ti->dpl&3)<<13;
+
+ SET_FAST_TRAP(&current->thread);
+
+ return 0;
+}
+
+
+long do_fpu_taskswitch(void)
+{
+ current->flags |= PF_GUEST_STTS;
+ stts();
+ return 0;
+}
+
+
+long do_set_debugreg(int reg, unsigned long value)
+{
+ int i;
+
+ switch ( reg )
+ {
+ case 0:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ __asm__ ( "movl %0, %%db0" : : "r" (value) );
+ break;
+ case 1:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ __asm__ ( "movl %0, %%db1" : : "r" (value) );
+ break;
+ case 2:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ __asm__ ( "movl %0, %%db2" : : "r" (value) );
+ break;
+ case 3:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ __asm__ ( "movl %0, %%db3" : : "r" (value) );
+ break;
+ case 6:
+ /*
+ * DR6: Bits 4-11,16-31 reserved (set to 1).
+ * Bit 12 reserved (set to 0).
+ */
+ value &= 0xffffefff; /* reserved bits => 0 */
+ value |= 0xffff0ff0; /* reserved bits => 1 */
+ __asm__ ( "movl %0, %%db6" : : "r" (value) );
+ break;
+ case 7:
+ /*
+ * DR7: Bit 10 reserved (set to 1).
+ * Bits 11-12,14-15 reserved (set to 0).
+ * Privileged bits:
+ * GD (bit 13): must be 0.
+ * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
+ * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
+ */
+ /* DR7 == 0 => debugging disabled for this domain. */
+ if ( value != 0 )
+ {
+ value &= 0xffff27ff; /* reserved bits => 0 */
+ value |= 0x00000400; /* reserved bits => 1 */
+ if ( (value & (1<<13)) != 0 ) return -EPERM;
+ for ( i = 0; i < 16; i += 2 )
+ if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
+ }
+ __asm__ ( "movl %0, %%db7" : : "r" (value) );
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ current->thread.debugreg[reg] = value;
+ return 0;
+}
+
+unsigned long do_get_debugreg(int reg)
+{
+ if ( (reg < 0) || (reg > 7) ) return -EINVAL;
+ return current->thread.debugreg[reg];
+}
diff --git a/xen/arch/i386/usercopy.c b/xen/arch/i386/usercopy.c
new file mode 100644
index 0000000000..56322f1b56
--- /dev/null
+++ b/xen/arch/i386/usercopy.c
@@ -0,0 +1,190 @@
+/*
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <linux/config.h>
+#include <asm/uaccess.h>
+//#include <asm/mmx.h>
+
+#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ {
+ if(n<512)
+ __copy_user(to,from,n);
+ else
+ mmx_copy_user(to,from,n);
+ }
+ return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ {
+ if(n<512)
+ __copy_user_zeroing(to,from,n);
+ else
+ mmx_copy_user_zeroing(to, from, n);
+ }
+ else
+ memset(to, 0, n);
+ return n;
+}
+
+#else
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+ prefetch(from);
+ if (access_ok(VERIFY_WRITE, to, n))
+ __copy_user(to,from,n);
+ return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+ prefetchw(to);
+ if (access_ok(VERIFY_READ, from, n))
+ __copy_user_zeroing(to,from,n);
+ else
+ memset(to, 0, n);
+ return n;
+}
+
+#endif
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res) \
+do { \
+ int __d0, __d1, __d2; \
+ __asm__ __volatile__( \
+ " testl %1,%1\n" \
+ " jz 2f\n" \
+ "0: lodsb\n" \
+ " stosb\n" \
+ " testb %%al,%%al\n" \
+ " jz 1f\n" \
+ " decl %1\n" \
+ " jnz 0b\n" \
+ "1: subl %1,%0\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl %5,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ ".previous" \
+ : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
+ "=&D" (__d2) \
+ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+ : "memory"); \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+ long res;
+ __do_strncpy_from_user(dst, src, count, res);
+ return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+ long res = -EFAULT;
+ if (access_ok(VERIFY_READ, src, 1))
+ __do_strncpy_from_user(dst, src, count, res);
+ return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size) \
+do { \
+ int __d0; \
+ __asm__ __volatile__( \
+ "0: rep; stosl\n" \
+ " movl %2,%0\n" \
+ "1: rep; stosb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: lea 0(%2,%0,4),%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,2b\n" \
+ ".previous" \
+ : "=&c"(size), "=&D" (__d0) \
+ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
+} while (0)
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ __do_clear_user(to, n);
+ return n;
+}
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+ __do_clear_user(to, n);
+ return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+ unsigned long mask = -__addr_ok(s);
+ unsigned long res, tmp;
+
+ __asm__ __volatile__(
+ " testl %0, %0\n"
+ " jz 3f\n"
+ " andl %0,%%ecx\n"
+ "0: repne; scasb\n"
+ " setne %%al\n"
+ " subl %%ecx,%0\n"
+ " addl %0,%%eax\n"
+ "1:\n"
+ ".section .fixup,\"ax\"\n"
+ "2: xorl %%eax,%%eax\n"
+ " jmp 1b\n"
+ "3: movb $1,%%al\n"
+ " jmp 1b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 0b,2b\n"
+ ".previous"
+ :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+ :"0" (n), "1" (s), "2" (0), "3" (mask)
+ :"cc");
+ return res & mask;
+}
diff --git a/xen/arch/i386/xeno.lds b/xen/arch/i386/xeno.lds
new file mode 100644
index 0000000000..5947ebada5
--- /dev/null
+++ b/xen/arch/i386/xeno.lds
@@ -0,0 +1,87 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+SECTIONS
+{
+ . = 0xFC400000 + 0x100000;
+ _text = .; /* Text and read-only data */
+ .text : {
+ *(.text)
+ *(.fixup)
+ *(.gnu.warning)
+ } = 0x9090
+ .text.lock : { *(.text.lock) } /* out-of-line lock text */
+
+ _etext = .; /* End of text section */
+
+ .rodata : { *(.rodata) *(.rodata.*) }
+ .kstrtab : { *(.kstrtab) }
+
+ . = ALIGN(16); /* Exception table */
+ __start___ex_table = .;
+ __ex_table : { *(__ex_table) }
+ __stop___ex_table = .;
+
+ __start___ksymtab = .; /* Kernel symbol table */
+ __ksymtab : { *(__ksymtab) }
+ __stop___ksymtab = .;
+
+ __start___kallsyms = .; /* All kernel symbols */
+ __kallsyms : { *(__kallsyms) }
+ __stop___kallsyms = .;
+
+ .data : { /* Data */
+ *(.data)
+ CONSTRUCTORS
+ }
+
+ _edata = .; /* End of data section */
+
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : { *(.data.init_task) }
+
+ . = ALIGN(4096); /* Init code and data */
+ __init_begin = .;
+ .text.init : { *(.text.init) }
+ .data.init : { *(.data.init) }
+ . = ALIGN(16);
+ __setup_start = .;
+ .setup.init : { *(.setup.init) }
+ __setup_end = .;
+ __initcall_start = .;
+ .initcall.init : { *(.initcall.init) }
+ __initcall_end = .;
+ . = ALIGN(4096);
+ __init_end = .;
+
+ . = ALIGN(4096);
+ .data.page_aligned : { *(.data.idt) }
+
+ . = ALIGN(32);
+ .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+ __bss_start = .; /* BSS */
+ .bss : {
+ *(.bss)
+ }
+ _end = . ;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.text.exit)
+ *(.data.exit)
+ *(.exitcall.exit)
+ }
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+}
diff --git a/xen/common/Makefile b/xen/common/Makefile
new file mode 100644
index 0000000000..12f1f7d2e9
--- /dev/null
+++ b/xen/common/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o common.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen/common/ac_timer.c b/xen/common/ac_timer.c
new file mode 100644
index 0000000000..8f65ff7093
--- /dev/null
+++ b/xen/common/ac_timer.c
@@ -0,0 +1,335 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: ac_timer.c
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: Accurate timer for the Hypervisor
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/smp.h>
+#include <xeno/init.h>
+
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+#include <xeno/keyhandler.h>
+
+#include <asm/system.h>
+#include <asm/desc.h>
+
+
+#undef AC_TIMER_TRACE
+#undef AC_TIMER_STATS
+
+#ifdef AC_TIMER_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+/*
+ * We pull handlers off the timer list this far in future,
+ * rather than reprogramming the time hardware.
+ */
+#define TIMER_SLOP (50*1000) /* ns */
+
+/* A timer list per CPU */
+typedef struct ac_timers_st
+{
+ spinlock_t lock;
+ struct list_head timers;
+ struct ac_timer *prev, *curr;
+} __cacheline_aligned ac_timers_t;
+static ac_timers_t ac_timers[NR_CPUS];
+
+#ifdef AC_TIMER_STATS
+#define BUCKETS 1000
+#define MAX_STATS
+typedef struct act_stats_st
+{
+ u32 count;
+ u32 times[2*(BUCKETS)];
+} __cacheline_aligned act_stats_t;
+static act_stats_t act_stats[NR_CPUS];
+
+#endif
+
+/* local prototypes */
+static int detach_ac_timer(struct ac_timer *timer);
+/*static void ac_timer_debug(unsigned long);*/
+
+/*
+ * add a timer.
+ * return value:
+ * 0: success
+ * 1: failure, timer in the past or timeout value to small
+ * -1: failure, timer uninitialised
+ * fail
+ */
+int add_ac_timer(struct ac_timer *timer)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ s_time_t now;
+
+ /* make sure timeout value is in the future */
+ now = NOW();
+ TRC(printk("ACT [%02d] add(): now=%lld timo=%lld\n",
+ cpu, now, timer->expires));
+ if (timer->expires <= now) {
+ printk("ACT[%02d] add_ac_timer: now=0x%08X%08X > expire=0x%08X%08X\n",
+ cpu, (u32)(now>>32), (u32)now,
+ (u32)(timer->expires>>32), (u32)timer->expires);
+ return 1;
+ }
+ spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+ /*
+ * Add timer to the list. If it gets added to the front we have to
+ * reprogramm the timer
+ */
+ if (list_empty(&ac_timers[cpu].timers)) {
+ /* Reprogramm and add to head of list */
+ if (!reprogram_ac_timer(timer->expires)) {
+ /* failed */
+ printk("ACT [%02d] add(): add at head failed\n", cpu);
+ spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+ return 1;
+ }
+ list_add(&timer->timer_list, &ac_timers[cpu].timers);
+ TRC(printk("ACT [%02d] add(0x%08X%08X): added at head\n", cpu,
+ (u32)(timer->expires>>32), (u32)timer->expires));
+ } else {
+ struct list_head *pos;
+ struct ac_timer *t;
+ for (pos = ac_timers[cpu].timers.next;
+ pos != &ac_timers[cpu].timers;
+ pos = pos->next) {
+ t = list_entry(pos, struct ac_timer, timer_list);
+ if (t->expires > timer->expires)
+ break;
+ }
+
+ if (pos->prev == &ac_timers[cpu].timers) {
+ /* added to head, reprogramm timer */
+ if (!reprogram_ac_timer(timer->expires)) {
+ /* failed */
+ TRC(printk("ACT [%02d] add(): add at head failed\n", cpu));
+ spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+ return 1;
+ }
+ list_add (&(timer->timer_list), pos->prev);
+ TRC(printk("ACT [%02d] add(0x%08X%08X): added at head\n", cpu,
+ (u32)(timer->expires>>32), (u32)timer->expires));
+ } else {
+ list_add (&(timer->timer_list), pos->prev);
+ TRC(printk("ACT [%02d] add(0x%08X%08X): add < exp=0x%08X%08X\n",
+ cpu,
+ (u32)(timer->expires>>32), (u32)timer->expires,
+ (u32)(t->expires>>32), (u32)t->expires));
+ }
+ }
+ spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+ return 0;
+}
+
+/*
+ * remove a timer
+ * return values:
+ * 0: success
+ * -1: bogus timer
+ */
+static int detach_ac_timer(struct ac_timer *timer)
+{
+ TRC(int cpu = smp_processor_id());
+ TRC(printk("ACT [%02d] detach(): \n", cpu));
+ list_del(&timer->timer_list);
+ timer->timer_list.next = NULL;
+ return 0;
+}
+
+/*
+ * remove a timer
+ * return values:
+ * 0: success
+ * -1: bogus timer
+ */
+int rem_ac_timer(struct ac_timer *timer)
+{
+ int cpu = smp_processor_id();
+ int res;
+ unsigned long flags;
+
+ TRC(printk("ACT [%02d] remove(): timo=%lld \n", cpu, timer->expires));
+
+ spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+ res = detach_ac_timer(timer);
+ spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+
+ return res;
+}
+
+/*
+ * modify a timer, i.e., set a new timeout value
+ * return value:
+ * 0: sucess
+ * -1: error
+ */
+int mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
+{
+ if (rem_ac_timer(timer) != 0)
+ return -1;
+ timer->expires = new_time;
+ if (add_ac_timer(timer) != 0)
+ return -1;
+ return 0;
+}
+
+/*
+ * do_ac_timer
+ * deal with timeouts and run the handlers
+ */
+void do_ac_timer(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ struct ac_timer *t;
+
+ spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+
+ do_timer_again:
+
+ TRC(printk("ACT [%02d] do(): now=%lld\n", cpu, NOW()));
+
+ /* Sanity: is the timer list empty? */
+ if ( list_empty(&ac_timers[cpu].timers) )
+ printk("ACT[%02d] do_ac_timer(): timer irq without timer\n", cpu);
+
+#ifdef AC_TIMER_STATS
+ {
+ s32 diff;
+ u32 i;
+ diff = ((s32)(NOW() - t->expires)) / 1000; /* delta in us */
+ if (diff < -BUCKETS)
+ diff = -BUCKETS;
+ else if (diff > BUCKETS)
+ diff = BUCKETS;
+ act_stats[cpu].times[diff+BUCKETS]++;
+ act_stats[cpu].count++;
+
+ if (act_stats[cpu].count >= 5000) {
+ printk("ACT Stats\n");
+ for (i=0; i < 2*BUCKETS; i++) {
+ if (act_stats[cpu].times[i] != 0)
+ printk("ACT [%02d]: %3dus: %5d\n",
+ cpu,i-BUCKETS, act_stats[cpu].times[i]);
+ act_stats[cpu].times[i]=0;
+ }
+ act_stats[cpu].count = 0;
+ printk("\n");
+ }
+ }
+#endif
+
+ /* Handle all timeouts in the near future. */
+ while ( !list_empty(&ac_timers[cpu].timers) )
+ {
+ t = list_entry(ac_timers[cpu].timers.next,
+ struct ac_timer, timer_list);
+ if ( t->expires > (NOW() + TIMER_SLOP) ) break;
+ detach_ac_timer(t);
+ spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+ if ( t->function != NULL ) t->function(t->data);
+ spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+ }
+
+ /* If list not empty then reprogram timer to new head of list */
+ if ( !list_empty(&ac_timers[cpu].timers) )
+ {
+ t = list_entry(ac_timers[cpu].timers.next,
+ struct ac_timer, timer_list);
+ if ( t->expires > 0 )
+ {
+ TRC(printk("ACT [%02d] do(): reprog timo=%lld\n",cpu,t->expires));
+ if ( !reprogram_ac_timer(t->expires) )
+ {
+ TRC(printk("ACT [%02d] do(): again\n", cpu));
+ goto do_timer_again;
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+ TRC(printk("ACT [%02d] do(): end\n", cpu));
+}
+
+/*
+ * debug dump_queue
+ * arguments: queue head, name of queue
+ */
+static void dump_tqueue(struct list_head *queue, char *name)
+{
+ struct list_head *list;
+ int loop = 0;
+ struct ac_timer *t;
+
+ printk ("QUEUE %s %lx n: %lx, p: %lx\n", name, (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each (list, queue) {
+ t = list_entry(list, struct ac_timer, timer_list);
+ printk (" %s %d : %lx ex=0x%08X%08X %lu n: %lx, p: %lx\n",
+ name, loop++,
+ (unsigned long)list,
+ (u32)(t->expires>>32), (u32)t->expires, t->data,
+ (unsigned long)list->next, (unsigned long)list->prev);
+ }
+ return;
+}
+
+
+static void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ u_long flags;
+ s_time_t now = NOW();
+
+ printk("Dumping ac_timer queues for cpu 0: NOW=0x%08X%08X\n",
+ (u32)(now>>32), (u32)now);
+
+ spin_lock_irqsave(&ac_timers[0].lock, flags);
+ dump_tqueue(&ac_timers[0].timers, "ac_time");
+ spin_unlock_irqrestore(&ac_timers[0].lock, flags);
+ printk("\n");
+ return;
+}
+
+
+void __init ac_timer_init(void)
+{
+ int i;
+
+ printk ("ACT: Initialising Accurate timers\n");
+
+ for (i = 0; i < NR_CPUS; i++)
+ {
+ INIT_LIST_HEAD(&ac_timers[i].timers);
+ spin_lock_init(&ac_timers[i].lock);
+ }
+
+ add_key_handler('a', dump_timerq, "dump ac_timer queues");
+}
diff --git a/xen/common/block.c b/xen/common/block.c
new file mode 100644
index 0000000000..851b3b544c
--- /dev/null
+++ b/xen/common/block.c
@@ -0,0 +1,22 @@
+/* block.c
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's.
+ *
+ */
+
+#include <hypervisor-ifs/block.h>
+#include <xeno/lib.h>
+
+/*
+ * create_block_ring
+ *
+ * domain:
+ *
+ * allocates space for a particular domain's block io ring.
+ */
+blk_ring_t *create_block_ring(int domain)
+{
+ printk ("XEN create block ring <not implemented>");
+ return (blk_ring_t *)NULL;
+}
diff --git a/xen/common/brlock.c b/xen/common/brlock.c
new file mode 100644
index 0000000000..e2bccec6a7
--- /dev/null
+++ b/xen/common/brlock.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * linux/lib/brlock.c
+ *
+ * 'Big Reader' read-write spinlocks. See linux/brlock.h for details.
+ *
+ * Copyright 2000, Ingo Molnar <mingo@redhat.com>
+ * Copyright 2000, David S. Miller <davem@redhat.com>
+ */
+
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <linux/sched.h>
+#include <linux/brlock.h>
+
+#ifdef __BRLOCK_USE_ATOMICS
+
+brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
+ { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = RW_LOCK_UNLOCKED } };
+
+void __br_write_lock (enum brlock_indices idx)
+{
+ int i;
+
+ for (i = 0; i < smp_num_cpus; i++)
+ write_lock(&__brlock_array[cpu_logical_map(i)][idx]);
+}
+
+void __br_write_unlock (enum brlock_indices idx)
+{
+ int i;
+
+ for (i = 0; i < smp_num_cpus; i++)
+ write_unlock(&__brlock_array[cpu_logical_map(i)][idx]);
+}
+
+#else /* ! __BRLOCK_USE_ATOMICS */
+
+brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
+ { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = 0 } };
+
+struct br_wrlock __br_write_locks[__BR_IDX_MAX] =
+ { [0 ... __BR_IDX_MAX-1] = { SPIN_LOCK_UNLOCKED } };
+
+void __br_write_lock (enum brlock_indices idx)
+{
+ int i;
+
+again:
+ spin_lock(&__br_write_locks[idx].lock);
+ for (i = 0; i < smp_num_cpus; i++)
+ if (__brlock_array[cpu_logical_map(i)][idx] != 0) {
+ spin_unlock(&__br_write_locks[idx].lock);
+ barrier();
+ cpu_relax();
+ goto again;
+ }
+}
+
+void __br_write_unlock (enum brlock_indices idx)
+{
+ spin_unlock(&__br_write_locks[idx].lock);
+}
+
+#endif /* __BRLOCK_USE_ATOMICS */
+
+#endif /* CONFIG_SMP */
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
new file mode 100644
index 0000000000..1d43f641ba
--- /dev/null
+++ b/xen/common/dom0_ops.c
@@ -0,0 +1,150 @@
+/******************************************************************************
+ * dom0_ops.c
+ *
+ * Process command requests from domain-0 guest OS.
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/dom0_ops.h>
+#include <xeno/sched.h>
+#include <xeno/event.h>
+#include <asm/domain_page.h>
+
+extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int);
+
+static unsigned int get_domnr(void)
+{
+ struct task_struct *p = &idle0_task;
+ unsigned long dom_mask = 0;
+ read_lock_irq(&tasklist_lock);
+ do {
+ if ( is_idle_task(p) ) continue;
+ set_bit(p->domain, &dom_mask);
+ }
+ while ( (p = p->next_task) != &idle0_task );
+ read_unlock_irq(&tasklist_lock);
+ return (dom_mask == ~0UL) ? 0 : ffz(dom_mask);
+}
+
+static void build_page_list(struct task_struct *p)
+{
+ unsigned long *list;
+ unsigned long curr;
+ struct list_head *list_ent;
+
+ curr = list_entry(p->pg_head.next, struct pfn_info, list) - frame_table;
+ list = (unsigned long *)map_domain_mem(curr << PAGE_SHIFT);
+
+ list_for_each(list_ent, &p->pg_head)
+ {
+ *list++ = list_entry(list_ent, struct pfn_info, list) - frame_table;
+
+ if( ((unsigned long)list & ~PAGE_MASK) == 0 )
+ {
+ struct list_head *ent = frame_table[curr].list.next;
+ curr = list_entry(ent, struct pfn_info, list) - frame_table;
+ unmap_domain_mem(list-1);
+ list = (unsigned long *)map_domain_mem(curr << PAGE_SHIFT);
+ }
+ }
+
+ unmap_domain_mem(list);
+}
+
+long do_dom0_op(dom0_op_t *u_dom0_op)
+{
+ long ret = 0;
+ dom0_op_t op;
+
+ if ( current->domain != 0 )
+ return -EPERM;
+
+ if ( copy_from_user(&op, u_dom0_op, sizeof(op)) )
+ return -EFAULT;
+
+ switch ( op.cmd )
+ {
+
+ case DOM0_STARTDOM:
+ {
+ struct task_struct * p = find_domain_by_id(op.u.meminfo.domain);
+ if ( (ret = final_setup_guestos(p, &op.u.meminfo)) != 0 )
+ {
+ p->state = TASK_DYING;
+ release_task(p);
+ break;
+ }
+ wake_up(p);
+ reschedule(p);
+ ret = p->domain;
+ }
+ break;
+
+ case DOM0_NEWDOMAIN:
+ {
+ struct task_struct *p;
+ static unsigned int pro = 0;
+ unsigned int dom = get_domnr();
+ ret = -ENOMEM;
+ if ( dom == 0 ) break;
+ pro = (pro+1) % smp_num_cpus;
+ p = do_newdomain(dom, pro);
+ if ( p == NULL ) break;
+
+ ret = alloc_new_dom_mem(p, op.u.newdomain.memory_kb);
+ if ( ret != 0 ) break;
+
+ build_page_list(p);
+
+ ret = p->domain;
+
+ op.u.newdomain.domain = ret;
+ op.u.newdomain.pg_head =
+ list_entry(p->pg_head.next, struct pfn_info, list) -
+ frame_table;
+ copy_to_user(u_dom0_op, &op, sizeof(op));
+ }
+ break;
+
+ case DOM0_KILLDOMAIN:
+ {
+ unsigned int dom = op.u.killdomain.domain;
+ if ( dom == IDLE_DOMAIN_ID )
+ {
+ ret = -EPERM;
+ }
+ else
+ {
+ ret = kill_other_domain(dom);
+ }
+ }
+ break;
+
+ case DOM0_GETMEMLIST:
+ {
+ int i;
+ unsigned long pfn = op.u.getmemlist.start_pfn;
+ unsigned long *buffer = op.u.getmemlist.buffer;
+ struct list_head *list_ent;
+
+ for ( i = 0; i < op.u.getmemlist.num_pfns; i++ )
+ {
+ /* XXX We trust DOM0 to give us a safe buffer. XXX */
+ *buffer++ = pfn;
+ list_ent = frame_table[pfn].list.next;
+ pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
+ }
+ }
+ break;
+
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
diff --git a/xen/common/domain.c b/xen/common/domain.c
new file mode 100644
index 0000000000..5e862ada6d
--- /dev/null
+++ b/xen/common/domain.c
@@ -0,0 +1,590 @@
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <xeno/skbuff.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+#include <xeno/event.h>
+#include <xeno/time.h>
+#include <xeno/dom0_ops.h>
+#include <asm/io.h>
+#include <asm/domain_page.h>
+#include <asm/flushtlb.h>
+#include <asm/msr.h>
+#include <xeno/multiboot.h>
+#include <xeno/blkdev.h>
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
+
+extern int nr_mods;
+extern module_t *mod;
+extern unsigned char *cmdline;
+
+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
+
+/*
+ * create a new domain
+ */
+struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu)
+{
+ int retval;
+ struct task_struct *p = NULL;
+ unsigned long flags;
+
+ retval = -ENOMEM;
+ p = alloc_task_struct();
+ if (!p) goto newdomain_out;
+ memset(p, 0, sizeof(*p));
+
+ p->domain = dom_id;
+ p->processor = cpu;
+
+ spin_lock_init(&p->blk_ring_lock);
+
+ p->shared_info = (void *)get_free_page(GFP_KERNEL);
+ memset(p->shared_info, 0, PAGE_SIZE);
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id);
+
+ init_blkdev_info(p);
+
+ SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
+ SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
+
+ p->addr_limit = USER_DS;
+ p->state = TASK_UNINTERRUPTIBLE;
+ p->active_mm = &p->mm;
+ p->num_net_vifs = 0;
+
+ p->net_ring_base = (net_ring_t *)(p->shared_info + 1);
+ INIT_LIST_HEAD(&p->pg_head);
+ p->tot_pages = 0;
+ write_lock_irqsave(&tasklist_lock, flags);
+ SET_LINKS(p);
+ write_unlock_irqrestore(&tasklist_lock, flags);
+
+ newdomain_out:
+ return(p);
+}
+
+/* Get a pointer to the specified domain. Consider replacing this
+ * with a hash lookup later.
+ *
+ * Also, kill_other_domain should call this instead of scanning on its own.
+ */
+struct task_struct *find_domain_by_id(unsigned int dom)
+{
+ struct task_struct *p = &idle0_task;
+
+ read_lock_irq(&tasklist_lock);
+ do {
+ if ( (p->domain == dom) ) {
+ read_unlock_irq(&tasklist_lock);
+ return (p);
+ }
+ } while ( (p = p->next_task) != &idle0_task );
+ read_unlock_irq(&tasklist_lock);
+
+ return 0;
+}
+
+
+void kill_domain_with_errmsg(const char *err)
+{
+ printk("DOM%d FATAL ERROR: %s\n",
+ current->domain, err);
+ kill_domain();
+}
+
+
+/* Kill the currently executing domain. */
+void kill_domain(void)
+{
+ if ( current->domain == 0 )
+ {
+ extern void machine_restart(char *);
+ printk("Domain 0 killed: rebooting machine!\n");
+ machine_restart(0);
+ }
+
+ printk("Killing domain %d\n", current->domain);
+ current->state = TASK_DYING;
+ schedule();
+ BUG(); /* never get here */
+}
+
+
+long kill_other_domain(unsigned int dom)
+{
+ struct task_struct *p = &idle0_task;
+ unsigned long cpu_mask = 0;
+ long ret = -ESRCH;
+
+ read_lock_irq(&tasklist_lock);
+ do {
+ if ( p->domain == dom )
+ {
+ cpu_mask = mark_guest_event(p, _EVENT_DIE);
+ ret = 0;
+ break;
+ }
+ }
+ while ( (p = p->next_task) != &idle0_task );
+ read_unlock_irq(&tasklist_lock);
+
+ hyp_event_notify(cpu_mask);
+
+ return ret;
+}
+
+
+unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
+{
+ struct list_head *temp;
+ struct pfn_info *pf;
+ unsigned int alloc_pfns;
+ unsigned int req_pages;
+ unsigned long flags;
+
+ /* how many pages do we need to alloc? */
+ req_pages = kbytes >> (PAGE_SHIFT - 10);
+
+ spin_lock_irqsave(&free_list_lock, flags);
+
+ /* is there enough mem to serve the request? */
+ if ( req_pages > free_pfns ) return -1;
+
+ /* allocate pages and build a thread through frame_table */
+ temp = free_list.next;
+ for ( alloc_pfns = 0; alloc_pfns < req_pages; alloc_pfns++ )
+ {
+ pf = list_entry(temp, struct pfn_info, list);
+ pf->flags |= p->domain;
+ pf->type_count = pf->tot_count = 0;
+ temp = temp->next;
+ list_del(&pf->list);
+ list_add_tail(&pf->list, &p->pg_head);
+ free_pfns--;
+ }
+
+ spin_unlock_irqrestore(&free_list_lock, flags);
+
+ p->tot_pages = req_pages;
+
+ return 0;
+}
+
+
+void free_all_dom_mem(struct task_struct *p)
+{
+ struct list_head *list_ent, *tmp;
+
+ list_for_each_safe(list_ent, tmp, &p->pg_head)
+ {
+ struct pfn_info *pf = list_entry(list_ent, struct pfn_info, list);
+ pf->type_count = pf->tot_count = pf->flags = 0;
+ list_del(list_ent);
+ list_add(list_ent, &free_list);
+ }
+
+ p->tot_pages = 0;
+}
+
+
+/* Release resources belonging to task @p. */
+void release_task(struct task_struct *p)
+{
+ ASSERT(p->state == TASK_DYING);
+ ASSERT(!p->has_cpu);
+ write_lock_irq(&tasklist_lock);
+ REMOVE_LINKS(p);
+ write_unlock_irq(&tasklist_lock);
+
+ /*
+ * Safe! Only queue skbuffs with tasklist_lock held.
+ * Only access shared_info with tasklist_lock held.
+ * And free_task_struct() only releases if refcnt == 0.
+ */
+ while ( p->num_net_vifs )
+ {
+ destroy_net_vif(p);
+ }
+ if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt);
+
+ destroy_blkdev_info(p);
+
+ UNSHARE_PFN(virt_to_page(p->shared_info));
+ free_page((unsigned long)p->shared_info);
+
+ free_all_dom_mem(p);
+
+ free_task_struct(p);
+}
+
+
+/* final_setup_guestos is used for final setup and launching of domains other
+ * than domain 0. ie. the domains that are being built by the userspace dom0
+ * domain builder.
+ *
+ * Initial load map:
+ * start_address:
+ * OS image
+ * ....
+ * stack_start:
+ * start_info:
+ * <one page>
+ * page tables:
+ * <enough pages>
+ * end_address:
+ * shared_info:
+ * <one page>
+ */
+
+int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo)
+{
+ l2_pgentry_t * l2tab;
+ l1_pgentry_t * l1tab;
+ start_info_t * virt_startinfo_addr;
+ unsigned long virt_stack_addr;
+ unsigned long phys_l2tab;
+ net_ring_t *net_ring;
+ net_vif_t *net_vif;
+
+ /* entries 0xe0000000 onwards in page table must contain hypervisor
+ * mem mappings - set them up.
+ */
+ phys_l2tab = meminfo->l2_pgt_addr;
+ l2tab = map_domain_mem(phys_l2tab);
+ memcpy(l2tab + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
+ ((l2_pgentry_t *)idle_pg_table[p->processor]) +
+ DOMAIN_ENTRIES_PER_L2_PAGETABLE,
+ (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
+ * sizeof(l2_pgentry_t));
+ l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(p->mm.perdomain_pt) | PAGE_HYPERVISOR);
+ p->mm.pagetable = mk_pagetable(phys_l2tab);
+ unmap_domain_mem(l2tab);
+
+ /* map in the shared info structure */
+ phys_l2tab = pagetable_val(p->mm.pagetable);
+ l2tab = map_domain_mem(phys_l2tab);
+ l2tab += l2_table_offset(meminfo->virt_shinfo_addr);
+ l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+ l1tab += l1_table_offset(meminfo->virt_shinfo_addr);
+ *l1tab = mk_l1_pgentry(__pa(p->shared_info) | L1_PROT);
+ unmap_domain_mem((void *)((unsigned long)l2tab & PAGE_MASK));
+ unmap_domain_mem((void *)((unsigned long)l1tab & PAGE_MASK));
+
+ /* set up the shared info structure */
+ update_dom_time(p->shared_info);
+ p->shared_info->cpu_freq = cpu_freq;
+ p->shared_info->domain_time = 0;
+
+ /* we pass start info struct to guest os as function parameter on stack */
+ virt_startinfo_addr = (start_info_t *)meminfo->virt_startinfo_addr;
+ virt_stack_addr = (unsigned long)virt_startinfo_addr;
+
+ /* we need to populate start_info struct within the context of the
+ * new domain. thus, temporarely install its pagetables.
+ */
+ __cli();
+ __asm__ __volatile__ (
+ "mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable)));
+
+ memset(virt_startinfo_addr, 0, sizeof(*virt_startinfo_addr));
+ virt_startinfo_addr->nr_pages = p->tot_pages;
+ virt_startinfo_addr->shared_info = (shared_info_t *)meminfo->virt_shinfo_addr;
+ virt_startinfo_addr->pt_base = meminfo->virt_load_addr +
+ ((p->tot_pages - 1) << PAGE_SHIFT);
+
+ /* Add virtual network interfaces and point to them in startinfo. */
+ while (meminfo->num_vifs-- > 0) {
+ net_vif = create_net_vif(p->domain);
+ net_ring = net_vif->net_ring;
+ if (!net_ring) panic("no network ring!\n");
+ }
+
+/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */
+#define SH2G(_x) (meminfo->virt_shinfo_addr | (((unsigned long)(_x)) & 0xFFF))
+
+ virt_startinfo_addr->net_rings = (net_ring_t *)SH2G(p->net_ring_base);
+ virt_startinfo_addr->num_net_rings = p->num_net_vifs;
+
+ /* Add block io interface */
+ virt_startinfo_addr->blk_ring = virt_to_phys(p->blk_ring_base);
+
+ /* Copy the command line */
+ strcpy(virt_startinfo_addr->cmd_line, meminfo->cmd_line);
+
+ /* Reinstate the caller's page tables. */
+ __asm__ __volatile__ (
+ "mov %%eax,%%cr3" : : "a" (pagetable_val(current->mm.pagetable)));
+ __sti();
+
+ new_thread(p,
+ (unsigned long)meminfo->virt_load_addr,
+ (unsigned long)virt_stack_addr,
+ (unsigned long)virt_startinfo_addr);
+
+ return 0;
+}
+
+static unsigned long alloc_page_from_domain(unsigned long * cur_addr,
+ unsigned long * index)
+{
+ unsigned long ret = *cur_addr;
+ struct list_head *ent = frame_table[ret >> PAGE_SHIFT].list.prev;
+ *cur_addr = list_entry(ent, struct pfn_info, list) - frame_table;
+ *cur_addr <<= PAGE_SHIFT;
+ (*index)--;
+ return ret;
+}
+
+/* setup_guestos is used for building dom0 solely. other domains are built in
+ * userspace dom0 and final setup is being done by final_setup_guestos.
+ */
+int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
+{
+
+ struct list_head *list_ent;
+ char *src, *dst;
+ int i, dom = p->domain;
+ unsigned long phys_l1tab, phys_l2tab;
+ unsigned long cur_address, alloc_address;
+ unsigned long virt_load_address, virt_stack_address, virt_shinfo_address;
+ start_info_t *virt_startinfo_address;
+ unsigned long count;
+ unsigned long alloc_index;
+ l2_pgentry_t *l2tab, *l2start;
+ l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+ struct pfn_info *page = NULL;
+ net_ring_t *net_ring;
+ net_vif_t *net_vif;
+
+ /* Sanity! */
+ if ( p->domain != 0 ) BUG();
+
+ if ( strncmp(__va(mod[0].mod_start), "XenoGues", 8) )
+ {
+ printk("DOM%d: Invalid guest OS image\n", dom);
+ return -1;
+ }
+
+ virt_load_address = *(unsigned long *)__va(mod[0].mod_start + 8);
+ if ( (virt_load_address & (PAGE_SIZE-1)) )
+ {
+ printk("DOM%d: Guest OS load address not page-aligned (%08lx)\n",
+ dom, virt_load_address);
+ return -1;
+ }
+
+ if ( alloc_new_dom_mem(p, params->memory_kb) ) return -ENOMEM;
+ alloc_address = list_entry(p->pg_head.prev, struct pfn_info, list) -
+ frame_table;
+ alloc_address <<= PAGE_SHIFT;
+ alloc_index = p->tot_pages;
+
+ if ( (mod[nr_mods-1].mod_end-mod[0].mod_start) >
+ (params->memory_kb << 9) )
+ {
+ printk("DOM%d: Guest OS image is too large\n"
+ " (%luMB is greater than %uMB limit for a\n"
+ " %uMB address space)\n",
+ dom, (mod[nr_mods-1].mod_end-mod[0].mod_start)>>20,
+ (params->memory_kb)>>11,
+ (params->memory_kb)>>10);
+ free_all_dom_mem(p);
+ return -1;
+ }
+
+ printk("DOM%d: Guest OS virtual load address is %08lx\n", dom,
+ virt_load_address);
+
+ /*
+ * WARNING: The new domain must have its 'processor' field
+ * filled in by now !!
+ */
+ phys_l2tab = alloc_page_from_domain(&alloc_address, &alloc_index);
+ l2start = l2tab = map_domain_mem(phys_l2tab);
+ memcpy(l2tab, idle_pg_table[p->processor], PAGE_SIZE);
+ l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
+ memset(l2tab, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
+ p->mm.pagetable = mk_pagetable(phys_l2tab);
+
+ /*
+ * NB. The upper limit on this loop does one extra page. This is to make
+ * sure a pte exists when we want to map the shared_info struct.
+ */
+
+ l2tab += l2_table_offset(virt_load_address);
+ cur_address = list_entry(p->pg_head.next, struct pfn_info, list) -
+ frame_table;
+ cur_address <<= PAGE_SHIFT;
+ for ( count = 0; count < p->tot_pages + 1; count++ )
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+ if ( l1tab != NULL ) unmap_domain_mem(l1start);
+ phys_l1tab = alloc_page_from_domain(&alloc_address, &alloc_index);
+ *l2tab++ = mk_l2_pgentry(phys_l1tab|L2_PROT);
+ l1start = l1tab = map_domain_mem(phys_l1tab);
+ clear_page(l1tab);
+ l1tab += l1_table_offset(
+ virt_load_address + (count << PAGE_SHIFT));
+ }
+ *l1tab++ = mk_l1_pgentry(cur_address|L1_PROT);
+
+ if ( count < p->tot_pages )
+ {
+ page = frame_table + (cur_address >> PAGE_SHIFT);
+ page->flags = dom | PGT_writeable_page;
+ page->type_count = page->tot_count = 1;
+ /* Set up the MPT entry. */
+ machine_to_phys_mapping[cur_address >> PAGE_SHIFT] = count;
+ }
+
+ list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
+ cur_address = list_entry(list_ent, struct pfn_info, list) -
+ frame_table;
+ cur_address <<= PAGE_SHIFT;
+ }
+ unmap_domain_mem(l1start);
+
+ /* pages that are part of page tables must be read only */
+ cur_address = list_entry(p->pg_head.next, struct pfn_info, list) -
+ frame_table;
+ cur_address <<= PAGE_SHIFT;
+ for ( count = 0; count < alloc_index; count++ )
+ {
+ list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
+ cur_address = list_entry(list_ent, struct pfn_info, list) -
+ frame_table;
+ cur_address <<= PAGE_SHIFT;
+ }
+
+ l2tab = l2start + l2_table_offset(virt_load_address +
+ (alloc_index << PAGE_SHIFT));
+ l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+ l1tab += l1_table_offset(virt_load_address + (alloc_index << PAGE_SHIFT));
+ l2tab++;
+ for ( count = alloc_index; count < p->tot_pages; count++ )
+ {
+ *l1tab++ = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+ if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
+ {
+ unmap_domain_mem(l1start);
+ l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+ l2tab++;
+ }
+ page = frame_table + (cur_address >> PAGE_SHIFT);
+ page->flags = dom | PGT_l1_page_table;
+ page->tot_count++;
+
+ list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
+ cur_address = list_entry(list_ent, struct pfn_info, list) -
+ frame_table;
+ cur_address <<= PAGE_SHIFT;
+ }
+ page->type_count |= REFCNT_PIN_BIT;
+ page->tot_count |= REFCNT_PIN_BIT;
+ page->flags = dom | PGT_l2_page_table;
+ unmap_domain_mem(l1start);
+
+ /* Map in the the shared info structure. */
+ virt_shinfo_address = virt_load_address + (p->tot_pages << PAGE_SHIFT);
+ l2tab = l2start + l2_table_offset(virt_shinfo_address);
+ l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+ l1tab += l1_table_offset(virt_shinfo_address);
+ *l1tab = mk_l1_pgentry(__pa(p->shared_info)|L1_PROT);
+ unmap_domain_mem(l1start);
+
+ /* Set up shared info area. */
+ update_dom_time(p->shared_info);
+ p->shared_info->cpu_freq = cpu_freq;
+ p->shared_info->domain_time = 0;
+
+
+ virt_startinfo_address = (start_info_t *)
+ (virt_load_address + ((alloc_index - 1) << PAGE_SHIFT));
+ virt_stack_address = (unsigned long)virt_startinfo_address;
+
+ unmap_domain_mem(l2start);
+
+ /* Install the new page tables. */
+ __cli();
+ __write_cr3_counted(pagetable_val(p->mm.pagetable));
+
+ /* Copy the guest OS image. */
+ src = (char *)__va(mod[0].mod_start + 12);
+ dst = (char *)virt_load_address;
+ while ( src < (char *)__va(mod[nr_mods-1].mod_end) ) *dst++ = *src++;
+
+ /* Set up start info area. */
+ memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
+ virt_startinfo_address->nr_pages = p->tot_pages;
+ virt_startinfo_address->shared_info =
+ (shared_info_t *)virt_shinfo_address;
+ virt_startinfo_address->pt_base = virt_load_address +
+ ((p->tot_pages - 1) << PAGE_SHIFT);
+
+ /* Add virtual network interfaces and point to them in startinfo. */
+ while (params->num_vifs-- > 0) {
+ net_vif = create_net_vif(dom);
+ net_ring = net_vif->net_ring;
+ if (!net_ring) panic("no network ring!\n");
+ }
+
+/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */
+#define SHIP2GUEST(_x) (virt_shinfo_address | (((unsigned long)(_x)) & 0xFFF))
+
+ virt_startinfo_address->net_rings =
+ (net_ring_t *)SHIP2GUEST(p->net_ring_base);
+ virt_startinfo_address->num_net_rings = p->num_net_vifs;
+
+ /* Add block io interface */
+ virt_startinfo_address->blk_ring = virt_to_phys(p->blk_ring_base);
+
+ /* We tell OS about any modules we were given. */
+ if ( nr_mods > 1 )
+ {
+ virt_startinfo_address->mod_start =
+ (mod[1].mod_start-mod[0].mod_start-12) + virt_load_address;
+ virt_startinfo_address->mod_len =
+ mod[nr_mods-1].mod_end - mod[1].mod_start;
+ }
+
+ dst = virt_startinfo_address->cmd_line;
+ if ( mod[0].string )
+ {
+ char *modline = (char *)__va(mod[0].string);
+ for ( i = 0; i < 255; i++ )
+ {
+ if ( modline[i] == '\0' ) break;
+ *dst++ = modline[i];
+ }
+ }
+ *dst = '\0';
+
+ /* Reinstate the caller's page tables. */
+ __write_cr3_counted(pagetable_val(current->mm.pagetable));
+ __sti();
+
+ new_thread(p,
+ (unsigned long)virt_load_address,
+ (unsigned long)virt_stack_address,
+ (unsigned long)virt_startinfo_address);
+
+ return 0;
+}
+
+
+void __init domain_init(void)
+{
+ printk("Initialising domains\n");
+}
+
+
diff --git a/xen/common/domain_page.c b/xen/common/domain_page.c
new file mode 100644
index 0000000000..927ba63ff1
--- /dev/null
+++ b/xen/common/domain_page.c
@@ -0,0 +1,67 @@
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain pages. Based on ideas from the
+ * Linux PKMAP code -- the copyrights and credits are retained below.
+ */
+
+/*
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <asm/domain_page.h>
+#include <asm/pgalloc.h>
+
+static unsigned int map_idx[NR_CPUS];
+
+/* Use a spare PTE bit to mark entries ready for recycling. */
+#define READY_FOR_TLB_FLUSH (1<<10)
+
+static void flush_all_ready_maps(void)
+{
+ unsigned long *cache = mapcache[smp_processor_id()];
+
+ /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
+ do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
+ while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
+
+ local_flush_tlb();
+}
+
+
+void *map_domain_mem(unsigned long pa)
+{
+ unsigned long va;
+ int cpu = smp_processor_id();
+ unsigned int idx;
+ unsigned long *cache = mapcache[cpu];
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ for ( ; ; )
+ {
+ idx = map_idx[cpu] = (map_idx[cpu] + 1) & (MAPCACHE_ENTRIES - 1);
+ if ( idx == 0 ) flush_all_ready_maps();
+ if ( cache[idx] == 0 ) break;
+ }
+
+ cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
+
+ local_irq_restore(flags);
+
+ va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
+ return (void *)va;
+}
+
+void unmap_domain_mem(void *va)
+{
+ unsigned int idx;
+ idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
+ mapcache[smp_processor_id()][idx] |= READY_FOR_TLB_FLUSH;
+}
diff --git a/xen/common/event.c b/xen/common/event.c
new file mode 100644
index 0000000000..2774806443
--- /dev/null
+++ b/xen/common/event.c
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * event.c
+ *
+ * A nice interface for passing per-domain asynchronous events.
+ * These events are handled in the hypervisor, prior to return
+ * to the guest OS.
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/event.h>
+
+typedef void (*hyp_event_callback_fn_t)(void);
+
+extern void schedule(void);
+extern void update_shared_ring(void);
+
+/* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
+static hyp_event_callback_fn_t event_call_fn[] =
+{
+ schedule,
+ update_shared_ring,
+ kill_domain,
+};
+
+/* Handle outstanding events for the currently-executing domain. */
+void do_hyp_events(void)
+{
+ int nr;
+ while ( (nr = ffs(current->hyp_events)) != 0 )
+ (event_call_fn[nr-1])();
+}
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
new file mode 100644
index 0000000000..09aae6fc70
--- /dev/null
+++ b/xen/common/kernel.c
@@ -0,0 +1,519 @@
+#include <stdarg.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/multiboot.h>
+#include <xeno/spinlock.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <xeno/delay.h>
+#include <xeno/skbuff.h>
+#include <xeno/interrupt.h>
+#include <asm/io.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <xeno/dom0_ops.h>
+#include <asm/byteorder.h>
+#include <linux/if_ether.h>
+#include <asm/domain_page.h>
+
+/* VGA text definitions. */
+#define COLUMNS 80
+#define LINES 24
+#define ATTRIBUTE 7
+#define VIDEO __va(0xB8000)
+
+static int xpos, ypos;
+static volatile unsigned char *video;
+
+spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+
+struct e820entry {
+ unsigned long addr_lo, addr_hi; /* start of memory segment */
+ unsigned long size_lo, size_hi; /* size of memory segment */
+ unsigned long type; /* type of memory segment */
+};
+
+/* Used by domain.c:setup_guestos */
+int nr_mods;
+module_t *mod;
+
+void init_serial(void);
+void start_of_day(void);
+
+/* Command line options and variables. */
+unsigned long opt_dom0_ip = 0;
+unsigned int opt_dom0_mem = 16000; /* default kbytes for DOM0 */
+unsigned int opt_ne_base = 0; /* NE2k NICs cannot be probed */
+unsigned char opt_ifname[10] = "eth0";
+int opt_noht=0, opt_noacpi=0;
+enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL };
+static struct {
+ unsigned char *name;
+ int type;
+ void *var;
+} opts[] = {
+ { "dom0_ip", OPT_IP, &opt_dom0_ip },
+ { "dom0_mem", OPT_UINT, &opt_dom0_mem },
+ { "ne_base", OPT_UINT, &opt_ne_base },
+ { "ifname", OPT_STR, &opt_ifname },
+ { "noht", OPT_BOOL, &opt_noht },
+ { "noacpi", OPT_BOOL, &opt_noacpi },
+ { NULL, 0, NULL }
+};
+
+void cmain (unsigned long magic, multiboot_info_t *mbi)
+{
+ struct task_struct *new_dom;
+ dom0_newdomain_t dom0_params;
+ unsigned long max_page;
+ unsigned char *cmdline;
+ int i;
+
+ init_serial();
+ cls();
+
+ if ( magic != MULTIBOOT_BOOTLOADER_MAGIC )
+ {
+ printf("Invalid magic number: 0x%x\n", (unsigned)magic);
+ return;
+ }
+
+ /*
+ * We require some kind of memory and module information.
+ * The rest we can fake!
+ */
+ if ( (mbi->flags & 9) != 9 )
+ {
+ printf("Bad flags passed by bootloader: 0x%x\n", (unsigned)mbi->flags);
+ return;
+ }
+
+ if ( mbi->mods_count == 0 )
+ {
+ printf("Require at least one module!\n");
+ return;
+ }
+
+ /* Are mmap_* valid? */
+#if 0
+ if ( (mbi->flags & (1<<6)) )
+ {
+ memory_map_t *mmap = (memory_map_t *)mbi->mmap_addr;
+ struct e820entry *e820 = E820_MAP;
+
+ while ( (unsigned long)mmap < (mbi->mmap_addr + mbi->mmap_length) )
+ {
+ e820->addr_lo = mmap->base_addr_low;
+ e820->addr_hi = mmap->base_addr_high;
+ e820->size_lo = mmap->length_low;
+ e820->size_hi = mmap->length_high;
+ e820->type = mmap->type;
+ e820++;
+ mmap = (memory_map_t *)
+ ((unsigned long)mmap + mmap->size + sizeof (mmap->size));
+ }
+ }
+#endif
+
+ nr_mods = mbi->mods_count;
+ mod = (module_t *)__va(mbi->mods_addr);
+
+ /* Parse the command line. */
+ cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
+ if ( cmdline != NULL )
+ {
+ unsigned char *opt_end, *opt;
+ while ( *cmdline == ' ' ) cmdline++;
+ cmdline = strchr(cmdline, ' ');
+ while ( cmdline != NULL )
+ {
+ while ( *cmdline == ' ' ) cmdline++;
+ if ( *cmdline == '\0' ) break;
+ opt_end = strchr(cmdline, ' ');
+ if ( opt_end != NULL ) *opt_end++ = '\0';
+ opt = strchr(cmdline, '=');
+ if ( opt != NULL ) *opt++ = '\0';
+ for ( i = 0; opts[i].name != NULL; i++ )
+ {
+ if ( strcmp(opts[i].name, cmdline ) != 0 ) continue;
+ switch ( opts[i].type )
+ {
+ case OPT_IP:
+ if ( opt != NULL )
+ *(unsigned long *)opts[i].var = str_to_quad(opt);
+ break;
+ case OPT_STR:
+ if ( opt != NULL )
+ strcpy(opts[i].var, opt);
+ break;
+ case OPT_UINT:
+ if ( opt != NULL )
+ *(unsigned int *)opts[i].var =
+ simple_strtol(opt, (char **)&opt, 0);
+ break;
+ case OPT_BOOL:
+ *(int *)opts[i].var = 1;
+ break;
+ }
+ }
+ cmdline = opt_end;
+ }
+ }
+
+ memcpy(&idle0_task_union, &first_task_struct, sizeof(first_task_struct));
+
+ max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
+ init_frametable(max_page);
+ printk("Initialised all memory on a %luMB machine\n",
+ max_page >> (20-PAGE_SHIFT));
+
+ init_page_allocator(mod[nr_mods-1].mod_end, MAX_MONITOR_ADDRESS);
+
+ /* These things will get done by do_newdomain() for all other tasks. */
+ current->shared_info = (void *)get_free_page(GFP_KERNEL);
+ memset(current->shared_info, 0, sizeof(shared_info_t));
+ set_fs(USER_DS);
+ current->num_net_vifs = 0;
+
+ start_of_day();
+
+ /* Create initial domain 0. */
+ dom0_params.num_vifs = 1;
+ dom0_params.memory_kb = opt_dom0_mem;
+
+ if ( opt_dom0_ip == 0 )
+ panic("Must specify an IP address for domain 0!\n");
+
+ add_default_net_rule(0, opt_dom0_ip); // add vfr info for dom0
+
+ new_dom = do_newdomain(0, 0);
+ if ( new_dom == NULL ) panic("Error creating domain 0\n");
+ if ( setup_guestos(new_dom, &dom0_params) != 0 )
+ {
+ panic("Could not set up DOM0 guest OS\n");
+ }
+ update_dom_time(new_dom->shared_info);
+ wake_up(new_dom);
+
+ cpu_idle();
+}
+
+
+#define SERIAL_BASE 0x3f8
+#define RX_BUF 0
+#define TX_HOLD 0
+#define INT_ENABLE 1
+#define INT_IDENT 2
+#define DATA_FORMAT 3
+#define LINE_CTL 4
+#define LINE_STATUS 5
+#define LINE_IN 6
+#define DIVISOR_LO 0
+#define DIVISOR_HI 1
+
+void init_serial(void)
+{
+ /* 9600 baud, no parity, 1 stop bit, 8 data bits. */
+ outb(0x83, SERIAL_BASE+DATA_FORMAT);
+ outb(12, SERIAL_BASE+DIVISOR_LO);
+ outb(0, SERIAL_BASE+DIVISOR_HI);
+ outb(0x03, SERIAL_BASE+DATA_FORMAT);
+
+ /* No interrupts. */
+ outb(0x00, SERIAL_BASE+INT_ENABLE);
+}
+
+
+void putchar_serial(unsigned char c)
+{
+ if ( c == '\n' ) putchar_serial('\r');
+ if ( (c != '\n') && (c != '\r') && ((c < 32) || (c > 126)) ) return;
+ while ( !(inb(SERIAL_BASE+LINE_STATUS)&(1<<5)) ) barrier();
+ outb(c, SERIAL_BASE+TX_HOLD);
+}
+
+
+/* Clear the screen and initialize VIDEO, XPOS and YPOS. */
+void cls (void)
+{
+ int i;
+
+ video = (unsigned char *) VIDEO;
+
+ for (i = 0; i < COLUMNS * LINES * 2; i++)
+ *(video + i) = 0;
+
+ xpos = 0;
+ ypos = 0;
+
+ outw(10+(1<<(5+8)), 0x3d4); /* cursor off */
+}
+
+
+/* Put the character C on the screen. */
+static void putchar (int c)
+{
+ static char zeroarr[2*COLUMNS] = { 0 };
+
+ putchar_serial(c);
+
+ if (c == '\n' || c == '\r')
+ {
+ newline:
+ xpos = 0;
+ ypos++;
+ if (ypos >= LINES)
+ {
+ ypos = LINES-1;
+ memcpy((char*)video,
+ (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS);
+ memcpy((char*)video + (LINES-1)*2*COLUMNS,
+ zeroarr, 2*COLUMNS);
+ }
+ return;
+ }
+
+ *(video + (xpos + ypos * COLUMNS) * 2) = c & 0xFF;
+ *(video + (xpos + ypos * COLUMNS) * 2 + 1) = ATTRIBUTE;
+
+ xpos++;
+ if (xpos >= COLUMNS)
+ goto newline;
+}
+
+static inline void __putstr(const char *str)
+{
+ while ( *str ) putchar(*str++);
+}
+
+void printf (const char *fmt, ...)
+{
+ va_list args;
+ char buf[1024], *p;
+ unsigned long flags;
+
+ va_start(args, fmt);
+ (void)vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ p = buf;
+ spin_lock_irqsave(&console_lock, flags);
+ while ( *p ) putchar(*p++);
+ spin_unlock_irqrestore(&console_lock, flags);
+}
+
+void panic(const char *fmt, ...)
+{
+ va_list args;
+ char buf[1024], *p;
+ unsigned long flags;
+ extern void machine_restart(char *);
+
+ va_start(args, fmt);
+ (void)vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ /* Spit out multiline message in one go. */
+ spin_lock_irqsave(&console_lock, flags);
+ __putstr("\n****************************************\n");
+ p = buf;
+ while ( *p ) putchar(*p++);
+ __putstr("Aieee! CPU");
+ putchar((char)smp_processor_id() + '0');
+ __putstr(" is toast...\n");
+ __putstr("****************************************\n\n");
+ __putstr("Reboot in five seconds...\n");
+ spin_unlock_irqrestore(&console_lock, flags);
+
+ mdelay(5000);
+ machine_restart(0);
+}
+
+/* No-op syscall. */
+asmlinkage long sys_ni_syscall(void)
+{
+ return -ENOSYS;
+}
+
+
+unsigned short compute_cksum(unsigned short *buf, int count)
+{
+ /* Function written by ek247
+ * Computes IP and UDP checksum.
+ * To be used for the fake console packets
+ * created in console_export
+ */
+
+ unsigned long sum=0;
+
+ while (count--)
+ {
+ sum+=*buf++;
+ if (sum & 0xFFFF0000)
+ {
+ //carry occured, so wrap around
+ sum &=0xFFFF;
+ sum++;
+ }
+ }
+ return ~(sum & 0xFFFF);
+}
+
+
+
+/* XXX SMH: below is rather vile; pulled in to allow network console */
+
+extern int netif_rx(struct sk_buff *);
+extern struct net_device *the_dev;
+
+typedef struct my_udphdr {
+ __u16 source;
+ __u16 dest;
+ __u16 len;
+ __u16 check;
+} my_udphdr_t;
+
+
+typedef struct my_iphdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 ihl:4,
+ version:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 version:4,
+ ihl:4;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 tos;
+ __u16 tot_len;
+ __u16 id;
+ __u16 frag_off;
+ __u8 ttl;
+ __u8 protocol;
+ __u16 check;
+ __u32 saddr;
+ __u32 daddr;
+} my_iphdr_t;
+
+
+typedef struct my_ethhdr {
+ unsigned char h_dest[6];
+ unsigned char h_source[6];
+ unsigned short h_proto;
+} my_ethhdr_t;
+
+/*
+ * Function written by ek247. Exports console output from all domains upwards
+ * to domain0, by stuffing it into a fake network packet.
+ */
+int console_export(char *str, int len)
+{
+ struct sk_buff *skb;
+ struct my_iphdr *iph = NULL;
+ struct my_udphdr *udph = NULL;
+ struct my_ethhdr *ethh = NULL;
+ int hdr_size = sizeof(struct my_iphdr) + sizeof(struct my_udphdr);
+ u8 *skb_data;
+
+ skb = dev_alloc_skb(sizeof(struct my_ethhdr) +
+ hdr_size + len + 20);
+ if ( skb == NULL ) return 0;
+
+ skb->dev = the_dev;
+ skb_data = (u8 *)map_domain_mem((skb->pf - frame_table) << PAGE_SHIFT);
+ skb_reserve(skb, 2);
+
+ /* Get a pointer to each header. */
+ ethh = (struct my_ethhdr *)
+ (skb_data + (skb->data - skb->head));
+ iph = (struct my_iphdr *)(ethh + 1);
+ udph = (struct my_udphdr *)(iph + 1);
+
+ skb_reserve(skb, sizeof(struct my_ethhdr));
+ skb_put(skb, hdr_size + len);
+
+ /* Build IP header. */
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->frag_off= 0;
+ iph->id = 0xdead;
+ iph->ttl = 255;
+ iph->protocol= 17;
+ iph->daddr = htonl(opt_dom0_ip);
+ iph->saddr = htonl(0xa9fe0001);
+ iph->tot_len = htons(hdr_size + len);
+ iph->check = 0;
+ iph->check = compute_cksum((__u16 *)iph, sizeof(struct my_iphdr)/2);
+
+ /* Build UDP header. */
+ udph->source = htons(current->domain);
+ udph->dest = htons(666);
+ udph->len = htons(sizeof(struct my_udphdr) + len);
+ udph->check = 0;
+
+ /* Build the UDP payload. */
+ memcpy((char *)(udph + 1), str, len);
+
+ /* Fix Ethernet header. */
+ memset(ethh->h_source, 0, ETH_ALEN);
+ memset(ethh->h_dest, 0, ETH_ALEN);
+ ethh->h_proto = htons(ETH_P_IP);
+ skb->mac.ethernet= (struct ethhdr *)ethh;
+
+ /* Keep the net rule tables happy. */
+ skb->src_vif = VIF_PHYSICAL_INTERFACE;
+ skb->dst_vif = 0;
+
+ unmap_domain_mem(skb_data);
+
+ (void)netif_rx(skb);
+
+ return 1;
+}
+
+
+long do_console_write(char *str, int count)
+{
+#define SIZEOF_BUF 256
+ unsigned char safe_str[SIZEOF_BUF];
+ unsigned char exported_str[SIZEOF_BUF];
+ unsigned long flags;
+ int i=0;
+ int j=0;
+ unsigned char prev = '\n';
+
+ if ( count > SIZEOF_BUF ) count = SIZEOF_BUF;
+
+ if ( copy_from_user(safe_str, str, count) )
+ return -EFAULT;
+
+ spin_lock_irqsave(&console_lock, flags);
+
+ __putstr("DOM");
+ putchar(current->domain+'0');
+ __putstr(": ");
+
+ for ( i = 0; i < count; i++ )
+ {
+ exported_str[j++]=safe_str[i];
+
+ if ( !safe_str[i] ) break;
+ putchar(prev = safe_str[i]);
+ }
+
+ if ( prev != '\n' ) putchar('\n');
+
+ spin_unlock_irqrestore(&console_lock, flags);
+
+ exported_str[j]='\0';
+ console_export(exported_str, j-1);
+
+ return(0);
+}
+
+void __out_of_line_bug(int line)
+{
+ printk("kernel BUG in header file at line %d\n", line);
+ BUG();
+ for ( ; ; ) continue;
+}
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
new file mode 100644
index 0000000000..dcd1daa564
--- /dev/null
+++ b/xen/common/keyhandler.c
@@ -0,0 +1,130 @@
+#include <xeno/keyhandler.h>
+#include <xeno/reboot.h>
+
+extern void perfc_printall (u_char key, void *dev_id, struct pt_regs *regs);
+
+#define KEY_MAX 256
+#define STR_MAX 64
+
+typedef struct _key_te {
+ key_handler *handler;
+ char desc[STR_MAX];
+} key_te_t;
+
+static key_te_t key_table[KEY_MAX];
+
+void add_key_handler(u_char key, key_handler *handler, char *desc)
+{
+ int i;
+ char *str;
+
+ if(key_table[key].handler != NULL)
+ printk("Warning: overwriting handler for key 0x%x\n", key);
+
+ key_table[key].handler = handler;
+
+ str = key_table[key].desc;
+ for(i = 0; i < STR_MAX; i++) {
+ if(*desc)
+ *str++ = *desc++;
+ else break;
+ }
+ if (i == STR_MAX)
+ key_table[key].desc[STR_MAX-1] = '\0';
+
+ return;
+}
+
+key_handler *get_key_handler(u_char key)
+{
+ return key_table[key].handler;
+}
+
+
+void show_handlers(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ int i;
+
+ printk("'%c' pressed -> showing installed handlers\n", key);
+ for(i=0; i < KEY_MAX; i++)
+ if(key_table[i].handler)
+ printk(" key '%c' (ascii '%02x') => %s\n",
+ (i<33 || i>126)?(' '):(i),i,
+ key_table[i].desc);
+ return;
+}
+
+
+void dump_registers(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ extern void show_registers(struct pt_regs *regs);
+
+ printk("'%c' pressed -> dumping registers\n", key);
+ show_registers(regs);
+ return;
+}
+
+void halt_machine(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ printk("'%c' pressed -> rebooting machine\n", key);
+ machine_restart(NULL);
+ return;
+}
+
+
+
+/* XXX SMH: this is keir's fault */
+static char *task_states[] =
+{
+ "Running",
+ "Interruptible Sleep",
+ "Uninterruptible Sleep",
+ NULL, "Stopped",
+ NULL, NULL, NULL, "Dying",
+};
+
+void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ u_long flags;
+ struct task_struct *p;
+ shared_info_t *s;
+
+ printk("'%c' pressed -> dumping task queues\n", key);
+ read_lock_irqsave(&tasklist_lock, flags);
+ p = &idle0_task;
+ do {
+ printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
+ "hyp_events = %08x\n",
+ p->domain, p->processor, p->has_cpu ? 'T':'F',
+ task_states[p->state], p->hyp_events);
+ s = p->shared_info;
+ if(!is_idle_task(p)) {
+ printk("Guest: events = %08lx, event_enable = %08lx\n",
+ s->events, s->events_enable);
+ printk("Notifying guest...\n");
+ set_bit(_EVENT_DEBUG, &s->events);
+ }
+ }
+
+ while ( (p = p->next_task) != &idle0_task );
+ read_unlock_irqrestore(&tasklist_lock, flags);
+}
+
+
+void initialize_keytable()
+{
+ int i;
+
+ /* first initialize key handler table */
+ for(i = 0; i < KEY_MAX; i++)
+ key_table[i].handler = (key_handler *)NULL;
+
+ /* setup own handlers */
+ add_key_handler('d', dump_registers, "dump registers");
+ add_key_handler('h', show_handlers, "show this message");
+ add_key_handler('p', perfc_printall, "print performance counters");
+ add_key_handler('q', do_task_queues, "dump task queues + guest state");
+ add_key_handler('R', halt_machine, "reboot machine ungracefully");
+
+ return;
+}
diff --git a/xen/common/lib.c b/xen/common/lib.c
new file mode 100644
index 0000000000..ae75196ffd
--- /dev/null
+++ b/xen/common/lib.c
@@ -0,0 +1,526 @@
+
+#include <xeno/ctype.h>
+#include <xeno/lib.h>
+
+int memcmp(const void * cs,const void * ct,size_t count)
+{
+ const unsigned char *su1, *su2;
+ signed char res = 0;
+
+ for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+ if ((res = *su1 - *su2) != 0)
+ break;
+ return res;
+}
+
+void * memcpy(void * dest,const void *src,size_t count)
+{
+ char *tmp = (char *) dest, *s = (char *) src;
+
+ while (count--)
+ *tmp++ = *s++;
+
+ return dest;
+}
+
+int strncmp(const char * cs,const char * ct,size_t count)
+{
+ register signed char __res = 0;
+
+ while (count) {
+ if ((__res = *cs - *ct++) != 0 || !*cs++)
+ break;
+ count--;
+ }
+
+ return __res;
+}
+
+int strcmp(const char * cs,const char * ct)
+{
+ register signed char __res;
+
+ while (1) {
+ if ((__res = *cs - *ct++) != 0 || !*cs++)
+ break;
+ }
+
+ return __res;
+}
+
+char * strcpy(char * dest,const char *src)
+{
+ char *tmp = dest;
+
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return tmp;
+}
+
+char * strncpy(char * dest,const char *src,size_t count)
+{
+ char *tmp = dest;
+
+ while (count-- && (*dest++ = *src++) != '\0')
+ /* nothing */;
+
+ return tmp;
+}
+
+void * memset(void * s,int c,size_t count)
+{
+ char *xs = (char *) s;
+
+ while (count--)
+ *xs++ = c;
+
+ return s;
+}
+
+size_t strnlen(const char * s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+size_t strlen(const char * s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+char * strchr(const char * s, int c)
+{
+ for(; *s != (char) c; ++s)
+ if (*s == '\0')
+ return NULL;
+ return (char *) s;
+}
+
+char * strstr(const char * s1,const char * s2)
+{
+ int l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *) s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1,s2,l2))
+ return (char *) s1;
+ s1++;
+ }
+ return NULL;
+}
+
+
+/* for inc/ctype.h */
+unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */
+
+
+unsigned long str_to_quad(unsigned char *s)
+{
+ unsigned long quad = 0;
+ do {
+ quad <<= 8;
+ quad |= simple_strtol(s, (char **)&s, 10);
+ }
+ while ( *s++ == '.' );
+ return quad;
+}
+
+
+unsigned char *quad_to_str(unsigned long q, unsigned char *s)
+{
+ sprintf(s, "%ld.%ld.%ld.%ld",
+ (q>>24)&255, (q>>16)&255, (q>>8)&255, (q>>0)&255);
+ return s;
+}
+
+
+/* a couple of 64 bit operations ported from freebsd */
+
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
+ */
+
+#include <asm/types.h>
+
+
+/*
+ * Depending on the desired operation, we view a `long long' (aka quad_t) in
+ * one or more of the following formats.
+ */
+union uu {
+ s64 q; /* as a (signed) quad */
+ s64 uq; /* as an unsigned quad */
+ long sl[2]; /* as two signed longs */
+ unsigned long ul[2]; /* as two unsigned longs */
+};
+/* XXX RN: Yuck hardcoded endianess :) */
+#define _QUAD_HIGHWORD 1
+#define _QUAD_LOWWORD 0
+/*
+ * Define high and low longwords.
+ */
+#define H _QUAD_HIGHWORD
+#define L _QUAD_LOWWORD
+
+/*
+ * Total number of bits in a quad_t and in the pieces that make it up.
+ * These are used for shifting, and also below for halfword extraction
+ * and assembly.
+ */
+#define CHAR_BIT 8 /* number of bits in a char */
+#define QUAD_BITS (sizeof(s64) * CHAR_BIT)
+#define LONG_BITS (sizeof(long) * CHAR_BIT)
+#define HALF_BITS (sizeof(long) * CHAR_BIT / 2)
+
+/*
+ * Extract high and low shortwords from longword, and move low shortword of
+ * longword to upper half of long, i.e., produce the upper longword of
+ * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.)
+ *
+ * These are used in the multiply code, to split a longword into upper
+ * and lower halves, and to reassemble a product as a quad_t, shifted left
+ * (sizeof(long)*CHAR_BIT/2).
+ */
+#define HHALF(x) ((x) >> HALF_BITS)
+#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1))
+#define LHUP(x) ((x) << HALF_BITS)
+
+/*
+ * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
+ * section 4.3.1, pp. 257--259.
+ */
+#define B (1 << HALF_BITS) /* digit base */
+
+/* Combine two `digits' to make a single two-digit number. */
+#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
+
+/* select a type for digits in base B: use unsigned short if they fit */
+#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
+typedef unsigned short digit;
+#else
+typedef u_long digit;
+#endif
+
+/*
+ * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
+ * `fall out' the left (there never will be any such anyway).
+ * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
+ */
+static void
+shl(register digit *p, register int len, register int sh)
+{
+ register int i;
+
+ for (i = 0; i < len; i++)
+ p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
+ p[i] = LHALF(p[i] << sh);
+}
+
+/*
+ * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
+ *
+ * We do this in base 2-sup-HALF_BITS, so that all intermediate products
+ * fit within u_long. As a consequence, the maximum length dividend and
+ * divisor are 4 `digits' in this base (they are shorter if they have
+ * leading zeros).
+ */
+u64
+__qdivrem(uq, vq, arq)
+ u64 uq, vq, *arq;
+{
+ union uu tmp;
+ digit *u, *v, *q;
+ register digit v1, v2;
+ u_long qhat, rhat, t;
+ int m, n, d, j, i;
+ digit uspace[5], vspace[5], qspace[5];
+
+ /*
+ * Take care of special cases: divide by zero, and u < v.
+ */
+ if (vq == 0) {
+ /* divide by zero. */
+ static volatile const unsigned int zero = 0;
+
+ tmp.ul[H] = tmp.ul[L] = 1 / zero;
+ if (arq)
+ *arq = uq;
+ return (tmp.q);
+ }
+ if (uq < vq) {
+ if (arq)
+ *arq = uq;
+ return (0);
+ }
+ u = &uspace[0];
+ v = &vspace[0];
+ q = &qspace[0];
+
+ /*
+ * Break dividend and divisor into digits in base B, then
+ * count leading zeros to determine m and n. When done, we
+ * will have:
+ * u = (u[1]u[2]...u[m+n]) sub B
+ * v = (v[1]v[2]...v[n]) sub B
+ * v[1] != 0
+ * 1 < n <= 4 (if n = 1, we use a different division algorithm)
+ * m >= 0 (otherwise u < v, which we already checked)
+ * m + n = 4
+ * and thus
+ * m = 4 - n <= 2
+ */
+ tmp.uq = uq;
+ u[0] = 0;
+ u[1] = HHALF(tmp.ul[H]);
+ u[2] = LHALF(tmp.ul[H]);
+ u[3] = HHALF(tmp.ul[L]);
+ u[4] = LHALF(tmp.ul[L]);
+ tmp.uq = vq;
+ v[1] = HHALF(tmp.ul[H]);
+ v[2] = LHALF(tmp.ul[H]);
+ v[3] = HHALF(tmp.ul[L]);
+ v[4] = LHALF(tmp.ul[L]);
+ for (n = 4; v[1] == 0; v++) {
+ if (--n == 1) {
+ u_long rbj; /* r*B+u[j] (not root boy jim) */
+ digit q1, q2, q3, q4;
+
+ /*
+ * Change of plan, per exercise 16.
+ * r = 0;
+ * for j = 1..4:
+ * q[j] = floor((r*B + u[j]) / v),
+ * r = (r*B + u[j]) % v;
+ * We unroll this completely here.
+ */
+ t = v[2]; /* nonzero, by definition */
+ q1 = u[1] / t;
+ rbj = COMBINE(u[1] % t, u[2]);
+ q2 = rbj / t;
+ rbj = COMBINE(rbj % t, u[3]);
+ q3 = rbj / t;
+ rbj = COMBINE(rbj % t, u[4]);
+ q4 = rbj / t;
+ if (arq)
+ *arq = rbj % t;
+ tmp.ul[H] = COMBINE(q1, q2);
+ tmp.ul[L] = COMBINE(q3, q4);
+ return (tmp.q);
+ }
+ }
+
+ /*
+ * By adjusting q once we determine m, we can guarantee that
+ * there is a complete four-digit quotient at &qspace[1] when
+ * we finally stop.
+ */
+ for (m = 4 - n; u[1] == 0; u++)
+ m--;
+ for (i = 4 - m; --i >= 0;)
+ q[i] = 0;
+ q += 4 - m;
+
+ /*
+ * Here we run Program D, translated from MIX to C and acquiring
+ * a few minor changes.
+ *
+ * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
+ */
+ d = 0;
+ for (t = v[1]; t < B / 2; t <<= 1)
+ d++;
+ if (d > 0) {
+ shl(&u[0], m + n, d); /* u <<= d */
+ shl(&v[1], n - 1, d); /* v <<= d */
+ }
+ /*
+ * D2: j = 0.
+ */
+ j = 0;
+ v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
+ v2 = v[2]; /* for D3 */
+ do {
+ register digit uj0, uj1, uj2;
+
+ /*
+ * D3: Calculate qhat (\^q, in TeX notation).
+ * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
+ * let rhat = (u[j]*B + u[j+1]) mod v[1].
+ * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
+ * decrement qhat and increase rhat correspondingly.
+ * Note that if rhat >= B, v[2]*qhat < rhat*B.
+ */
+ uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
+ uj1 = u[j + 1]; /* for D3 only */
+ uj2 = u[j + 2]; /* for D3 only */
+ if (uj0 == v1) {
+ qhat = B;
+ rhat = uj1;
+ goto qhat_too_big;
+ } else {
+ u_long nn = COMBINE(uj0, uj1);
+ qhat = nn / v1;
+ rhat = nn % v1;
+ }
+ while (v2 * qhat > COMBINE(rhat, uj2)) {
+ qhat_too_big:
+ qhat--;
+ if ((rhat += v1) >= B)
+ break;
+ }
+ /*
+ * D4: Multiply and subtract.
+ * The variable `t' holds any borrows across the loop.
+ * We split this up so that we do not require v[0] = 0,
+ * and to eliminate a final special case.
+ */
+ for (t = 0, i = n; i > 0; i--) {
+ t = u[i + j] - v[i] * qhat - t;
+ u[i + j] = LHALF(t);
+ t = (B - HHALF(t)) & (B - 1);
+ }
+ t = u[j] - t;
+ u[j] = LHALF(t);
+ /*
+ * D5: test remainder.
+ * There is a borrow if and only if HHALF(t) is nonzero;
+ * in that (rare) case, qhat was too large (by exactly 1).
+ * Fix it by adding v[1..n] to u[j..j+n].
+ */
+ if (HHALF(t)) {
+ qhat--;
+ for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
+ t += u[i + j] + v[i];
+ u[i + j] = LHALF(t);
+ t = HHALF(t);
+ }
+ u[j] = LHALF(u[j] + t);
+ }
+ q[j] = qhat;
+ } while (++j <= m); /* D7: loop on j. */
+
+ /*
+ * If caller wants the remainder, we have to calculate it as
+ * u[m..m+n] >> d (this is at most n digits and thus fits in
+ * u[m+1..m+n], but we may need more source digits).
+ */
+ if (arq) {
+ if (d) {
+ for (i = m + n; i > m; --i)
+ u[i] = (u[i] >> d) |
+ LHALF(u[i - 1] << (HALF_BITS - d));
+ u[i] = 0;
+ }
+ tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
+ tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
+ *arq = tmp.q;
+ }
+
+ tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
+ tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
+ return (tmp.q);
+}
+
+
+/*
+ * Divide two signed quads.
+ * ??? if -1/2 should produce -1 on this machine, this code is wrong
+ */
+s64
+__divdi3(s64 a, s64 b)
+{
+ u64 ua, ub, uq;
+ int neg;
+
+ if (a < 0)
+ ua = -(u64)a, neg = 1;
+ else
+ ua = a, neg = 0;
+ if (b < 0)
+ ub = -(u64)b, neg ^= 1;
+ else
+ ub = b;
+ uq = __qdivrem(ua, ub, (u64 *)0);
+ return (neg ? -uq : uq);
+}
+
+/*
+ * Divide two unsigned quads.
+ */
+u64
+__udivdi3(a, b)
+ u64 a, b;
+{
+
+ return (__qdivrem(a, b, (u64 *)0));
+}
diff --git a/xen/common/memory.c b/xen/common/memory.c
new file mode 100644
index 0000000000..0c534ad3d1
--- /dev/null
+++ b/xen/common/memory.c
@@ -0,0 +1,820 @@
+/******************************************************************************
+ * memory.c
+ *
+ * Copyright (c) 2002 K A Fraser
+ *
+ * A description of the page table API:
+ *
+ * Domains trap to process_page_updates with a list of update requests.
+ * This is a list of (ptr, val) pairs, where the requested operation
+ * is *ptr = val.
+ *
+ * Reference counting of pages:
+ * ----------------------------
+ * Each page has two refcounts: tot_count and type_count.
+ *
+ * TOT_COUNT is the obvious reference count. It counts all uses of a
+ * physical page frame by a domain, including uses as a page directory,
+ * a page table, or simple mappings via a PTE. This count prevents a
+ * domain from releasing a frame back to the hypervisor's free pool when
+ * it is still referencing it!
+ *
+ * TYPE_COUNT is more subtle. A frame can be put to one of three
+ * mutually-exclusive uses: it might be used as a page directory, or a
+ * page table, or it may be mapped writeable by the domain [of course, a
+ * frame may not be used in any of these three ways!].
+ * So, type_count is a count of the number of times a frame is being
+ * referred to in its current incarnation. Therefore, a page can only
+ * change its type when its type count is zero.
+ *
+ * Pinning the page type:
+ * ----------------------
+ * The type of a page can be pinned/unpinned with the commands
+ * PGEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
+ * pinning is not reference counted, so it can't be nested).
+ * This is useful to prevent a page's type count falling to zero, at which
+ * point safety checks would need to be carried out next time the count
+ * is increased again.
+ *
+ * A further note on writeable page mappings:
+ * ------------------------------------------
+ * For simplicity, the count of writeable mappings for a page may not
+ * correspond to reality. The 'writeable count' is incremented for every
+ * PTE which maps the page with the _PAGE_RW flag set. However, for
+ * write access to be possible the page directory entry must also have
+ * its _PAGE_RW bit set. We do not check this as it complicates the
+ * reference counting considerably [consider the case of multiple
+ * directory entries referencing a single page table, some with the RW
+ * bit set, others not -- it starts getting a bit messy].
+ * In normal use, this simplification shouldn't be a problem.
+ * However, the logic can be added if required.
+ *
+ * One more note on read-only page mappings:
+ * -----------------------------------------
+ * We want domains to be able to map pages for read-only access. The
+ * main reason is that page tables and directories should be readable
+ * by a domain, but it would not be safe for them to be writeable.
+ * However, domains have free access to rings 1 & 2 of the Intel
+ * privilege model. In terms of page protection, these are considered
+ * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
+ * read-only restrictions are respected in supervisor mode -- if the
+ * bit is clear then any mapped page is writeable.
+ *
+ * We get round this by always setting the WP bit and disallowing
+ * updates to it. This is very unlikely to cause a problem for guest
+ * OS's, which will generally use the WP bit to simplify copy-on-write
+ * implementation (in that case, OS wants a fault when it writes to
+ * an application-supplied buffer).
+ */
+
+
+/*
+ * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE.
+ * [THAT IS, THEY'RE NOT A PROBLEM NOW, AND MAY NOT EVER BE.]
+ * -----------------------------------------------------------------------
+ *
+ * *********
+ * UPDATE 15/7/02: Interface has changed --updates now specify physical
+ * address of page-table entry, rather than specifying a virtual address,
+ * so hypervisor no longer "walks" the page tables. Therefore the
+ * solution below cannot work. Another possibility is to add a new entry
+ * to our "struct page" which says to which top-level page table each
+ * lower-level page table or writeable mapping belongs. If it belongs to more
+ * than one, we'd probably just flush on all processors running the domain.
+ * *********
+ *
+ * ** 1 **
+ * The problem involves creating new page tables which might be mapped
+ * writeable in the TLB of another processor. As an example, a domain might be
+ * running in two contexts (ie. on two processors) simultaneously, using the
+ * same top-level page table in both contexts. Now, if context 1 sends an
+ * update request [make page P read-only, add a reference to page P as a page
+ * table], that will succeed if there was only one writeable mapping of P.
+ * However, that mapping may persist in the TLB of context 2.
+ *
+ * Solution: when installing a new page table, we must flush foreign TLBs as
+ * necessary. Naive solution is to flush on any processor running our domain.
+ * Cleverer solution is to flush on any processor running same top-level page
+ * table, but this will sometimes fail (consider two different top-level page
+ * tables which have a shared lower-level page table).
+ *
+ * A better solution: when squashing a write reference, check how many times
+ * that lowest-level table entry is referenced by ORing refcounts of tables
+ * down the page-table hierarchy. If results is != 1, we require flushing all
+ * instances of current domain if a new table is installed (because the
+ * lowest-level entry may be referenced by many top-level page tables).
+ * However, common case will be that result == 1, so we only need to flush
+ * processors with the same top-level page table. Make choice at
+ * table-installation time based on a `flush_level' flag, which is
+ * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this
+ * to FLUSH_NONE, while squashed write mappings can only promote up
+ * to more aggressive flush types.
+ *
+ * ** 2 **
+ * Same problem occurs when removing a page table, at level 1 say, then
+ * making it writeable. Need a TLB flush between otherwise another processor
+ * might write an illegal mapping into the old table, while yet another
+ * processor can use the illegal mapping because of a stale level-2 TLB
+ * entry. So, removal of a table reference sets 'flush_level' appropriately,
+ * and a flush occurs on next addition of a fresh write mapping.
+ *
+ * BETTER SOLUTION FOR BOTH 1 AND 2:
+ * When type_refcnt goes to zero, leave old type in place (don't set to
+ * PGT_none). Then, only flush if making a page table of a page with
+ * (cnt=0,type=PGT_writeable), or when adding a write mapping for a page
+ * with (cnt=0, type=PGT_pagexxx). A TLB flush will cause all pages
+ * with refcnt==0 to be reset to PGT_none. Need an array for the purpose,
+ * added to when a type_refcnt goes to zero, and emptied on a TLB flush.
+ * Either have per-domain table, or force TLB flush at end of each
+ * call to 'process_page_updates'.
+ * Most OSes will always keep a writeable reference hanging around, and
+ * page table structure is fairly static, so this mechanism should be
+ * fairly cheap.
+ *
+ * MAYBE EVEN BETTER? [somewhat dubious: not for first cut of the code]:
+ * If we need to force an intermediate flush, those other processors
+ * spin until we complete, then do a single TLB flush. They can spin on
+ * the lock protecting 'process_page_updates', and continue when that
+ * is freed. Saves cost of setting up and servicing an IPI: later
+ * communication is synchronous. Processors trying to install the domain
+ * or domain&pagetable would also enter the spin.
+ *
+ * ** 3 **
+ * Indeed, this problem generalises to reusing page tables at different
+ * levels of the hierarchy (conceptually, the guest OS can use the
+ * hypervisor to introduce illegal table entries by proxy). Consider
+ * unlinking a level-1 page table and reintroducing at level 2 with no
+ * TLB flush. Hypervisor can add a reference to some other level-1 table
+ * with the RW bit set. This is fine in the level-2 context, but some
+ * other processor may still be using that table in level-1 context
+ * (due to a stale TLB entry). At level 1 it may look like the
+ * processor has write access to the other level-1 page table! Therefore
+ * can add illegal values there with impunity :-(
+ *
+ * Fortunately, the solution above generalises to this extended problem.
+ */
+
+/*
+ * UPDATE 12.11.02.: We no longer have struct page and mem_map. These
+ * have been replaced by struct pfn_info and frame_table respectively.
+ *
+ * system_free_list is a list_head linking all system owned free pages.
+ * it is initialized in init_frametable.
+ *
+ * Boris Dragovic.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/mm.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/domain_page.h>
+
+#if 0
+#define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/* Domain 0 is allowed to submit requests on behalf of others. */
+#define DOMAIN_OKAY(_f) \
+ ((((_f) & PG_domain_mask) == current->domain) || (current->domain == 0))
+
+/* 'get' checks parameter for validity before inc'ing refcnt. */
+static int get_l2_table(unsigned long page_nr);
+static int get_l1_table(unsigned long page_nr);
+static int get_page(unsigned long page_nr, int writeable);
+static int inc_page_refcnt(unsigned long page_nr, unsigned int type);
+/* 'put' does no checking because if refcnt not zero, entity must be valid. */
+static void put_l2_table(unsigned long page_nr);
+static void put_l1_table(unsigned long page_nr);
+static void put_page(unsigned long page_nr, int writeable);
+static int dec_page_refcnt(unsigned long page_nr, unsigned int type);
+
+static int mod_l2_entry(unsigned long, l2_pgentry_t);
+static int mod_l1_entry(unsigned long, l1_pgentry_t);
+
+/* frame table size and its size in pages */
+frame_table_t * frame_table;
+unsigned long frame_table_size;
+unsigned long max_page;
+
+struct list_head free_list;
+spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED;
+unsigned int free_pfns;
+
+static int tlb_flush[NR_CPUS];
+
+/*
+ * init_frametable:
+ * Initialise per-frame memory information. This goes directly after
+ * MAX_MONITOR_ADDRESS in physical memory.
+ */
+void __init init_frametable(unsigned long nr_pages)
+{
+ struct pfn_info *pf;
+ unsigned long page_index;
+ unsigned long flags;
+
+ memset(tlb_flush, 0, sizeof(tlb_flush));
+
+ max_page = nr_pages;
+ frame_table_size = nr_pages * sizeof(struct pfn_info);
+ frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
+ frame_table = (frame_table_t *)FRAMETABLE_VIRT_START;
+ memset(frame_table, 0, frame_table_size);
+
+ free_pfns = 0;
+
+ /* Put all domain-allocatable memory on a free list. */
+ spin_lock_irqsave(&free_list_lock, flags);
+ INIT_LIST_HEAD(&free_list);
+ for( page_index = (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT;
+ page_index < nr_pages;
+ page_index++ )
+ {
+ pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
+ list_add_tail(&pf->list, &free_list);
+ free_pfns++;
+ }
+ spin_unlock_irqrestore(&free_list_lock, flags);
+}
+
+
+/* Return original refcnt, or -1 on error. */
+static int inc_page_refcnt(unsigned long page_nr, unsigned int type)
+{
+ struct pfn_info *page;
+ unsigned long flags;
+
+ if ( page_nr >= max_page )
+ {
+ MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+ return -1;
+ }
+ page = frame_table + page_nr;
+ flags = page->flags;
+ if ( !DOMAIN_OKAY(flags) )
+ {
+ MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
+ return -1;
+ }
+ if ( (flags & PG_type_mask) != type )
+ {
+ if ( page_type_count(page) != 0 )
+ {
+ MEM_LOG("Page %08lx bad type/count (%08lx!=%08x) cnt=%ld",
+ page_nr << PAGE_SHIFT,
+ flags & PG_type_mask, type, page_type_count(page));
+ return -1;
+ }
+
+ page->flags |= type;
+ }
+
+ get_page_tot(page);
+ return get_page_type(page);
+}
+
+/* Return new refcnt, or -1 on error. */
+static int dec_page_refcnt(unsigned long page_nr, unsigned int type)
+{
+ struct pfn_info *page;
+ int ret;
+
+ if ( page_nr >= max_page )
+ {
+ MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+ return -1;
+ }
+ page = frame_table + page_nr;
+ if ( !DOMAIN_OKAY(page->flags) ||
+ ((page->flags & PG_type_mask) != type) )
+ {
+ MEM_LOG("Bad page type/domain (dom=%ld) (type %ld != expected %d)",
+ page->flags & PG_domain_mask, page->flags & PG_type_mask,
+ type);
+ return -1;
+ }
+ ASSERT(page_type_count(page) != 0);
+ if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask;
+ put_page_tot(page);
+ return ret;
+}
+
+
+/* We allow a L2 table to map itself, to achieve a linear pagetable. */
+/* NB. There's no need for a put_twisted_l2_table() function!! */
+static int get_twisted_l2_table(unsigned long entry_pfn, l2_pgentry_t l2e)
+{
+ unsigned long l2v = l2_pgentry_val(l2e);
+
+ /* Clearly the mapping must be read-only :-) */
+ if ( (l2v & _PAGE_RW) )
+ {
+ MEM_LOG("Attempt to install twisted L2 entry with write permissions");
+ return -1;
+ }
+
+ /* This is a sufficient final check. */
+ if ( (l2v >> PAGE_SHIFT) != entry_pfn )
+ {
+ MEM_LOG("L2 tables may not map _other_ L2 tables!\n");
+ return -1;
+ }
+
+ /* We don't bump the reference counts. */
+ return 0;
+}
+
+
+static int get_l2_table(unsigned long page_nr)
+{
+ l2_pgentry_t *p_l2_entry, l2_entry;
+ int i, ret=0;
+
+ ret = inc_page_refcnt(page_nr, PGT_l2_page_table);
+ if ( ret != 0 ) return (ret < 0) ? ret : 0;
+
+ /* NEW level-2 page table! Deal with every PDE in the table. */
+ p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+ for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+ {
+ l2_entry = *p_l2_entry++;
+ if ( !(l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) continue;
+ if ( (l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
+ {
+ MEM_LOG("Bad L2 page type settings %04lx",
+ l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE));
+ ret = -1;
+ goto out;
+ }
+ /* Assume we're mapping an L1 table, falling back to twisted L2. */
+ ret = get_l1_table(l2_pgentry_to_pagenr(l2_entry));
+ if ( ret ) ret = get_twisted_l2_table(page_nr, l2_entry);
+ if ( ret ) goto out;
+ }
+
+ /* Now we simply slap in our high mapping. */
+ memcpy(p_l2_entry,
+ idle_pg_table[smp_processor_id()] + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
+ HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+ p_l2_entry[(PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT) -
+ DOMAIN_ENTRIES_PER_L2_PAGETABLE] =
+ mk_l2_pgentry(__pa(current->mm.perdomain_pt) | __PAGE_HYPERVISOR);
+
+ out:
+ unmap_domain_mem(p_l2_entry);
+ return ret;
+}
+
+static int get_l1_table(unsigned long page_nr)
+{
+ l1_pgentry_t *p_l1_entry, l1_entry;
+ int i, ret;
+
+ /* Update ref count for page pointed at by PDE. */
+ ret = inc_page_refcnt(page_nr, PGT_l1_page_table);
+ if ( ret != 0 ) return (ret < 0) ? ret : 0;
+
+ /* NEW level-1 page table! Deal with every PTE in the table. */
+ p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+ for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+ {
+ l1_entry = *p_l1_entry++;
+ if ( !(l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) continue;
+ if ( (l1_pgentry_val(l1_entry) &
+ (_PAGE_GLOBAL|_PAGE_PAT)) )
+ {
+ MEM_LOG("Bad L1 page type settings %04lx",
+ l1_pgentry_val(l1_entry) &
+ (_PAGE_GLOBAL|_PAGE_PAT));
+ ret = -1;
+ goto out;
+ }
+ ret = get_page(l1_pgentry_to_pagenr(l1_entry),
+ l1_pgentry_val(l1_entry) & _PAGE_RW);
+ if ( ret ) goto out;
+ }
+
+ out:
+ /* Make sure we unmap the right page! */
+ unmap_domain_mem(p_l1_entry-1);
+ return ret;
+}
+
+static int get_page(unsigned long page_nr, int writeable)
+{
+ struct pfn_info *page;
+ unsigned long flags;
+
+ /* Update ref count for page pointed at by PTE. */
+ if ( page_nr >= max_page )
+ {
+ MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+ return(-1);
+ }
+ page = frame_table + page_nr;
+ flags = page->flags;
+ if ( !DOMAIN_OKAY(flags) )
+ {
+ MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
+ return(-1);
+ }
+
+ if ( writeable )
+ {
+ if ( (flags & PG_type_mask) != PGT_writeable_page )
+ {
+ if ( page_type_count(page) != 0 )
+ {
+ MEM_LOG("Bad page type/count (%08lx!=%08x) cnt=%ld",
+ flags & PG_type_mask, PGT_writeable_page,
+ page_type_count(page));
+ return(-1);
+ }
+ page->flags |= PGT_writeable_page;
+ }
+ get_page_type(page);
+ }
+
+ get_page_tot(page);
+
+ return(0);
+}
+
+static void put_l2_table(unsigned long page_nr)
+{
+ l2_pgentry_t *p_l2_entry, l2_entry;
+ int i;
+
+ if ( dec_page_refcnt(page_nr, PGT_l2_page_table) ) return;
+
+ /* We had last reference to level-2 page table. Free the PDEs. */
+ p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+ for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+ {
+ l2_entry = *p_l2_entry++;
+ if ( (l2_pgentry_val(l2_entry) & _PAGE_PRESENT) )
+ put_l1_table(l2_pgentry_to_pagenr(l2_entry));
+ }
+
+ unmap_domain_mem(p_l2_entry);
+}
+
+static void put_l1_table(unsigned long page_nr)
+{
+ l1_pgentry_t *p_l1_entry, l1_entry;
+ int i;
+
+ if ( dec_page_refcnt(page_nr, PGT_l1_page_table) ) return;
+
+ /* We had last reference to level-1 page table. Free the PTEs. */
+ p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+ for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+ {
+ l1_entry = *p_l1_entry++;
+ if ( (l1_pgentry_val(l1_entry) & _PAGE_PRESENT) )
+ {
+ put_page(l1_pgentry_to_pagenr(l1_entry),
+ l1_pgentry_val(l1_entry) & _PAGE_RW);
+ }
+ }
+
+ /* Make sure we unmap the right page! */
+ unmap_domain_mem(p_l1_entry-1);
+}
+
+static void put_page(unsigned long page_nr, int writeable)
+{
+ struct pfn_info *page;
+ ASSERT(page_nr < max_page);
+ page = frame_table + page_nr;
+ ASSERT(DOMAIN_OKAY(page->flags));
+ ASSERT((!writeable) ||
+ ((page_type_count(page) != 0) &&
+ ((page->flags & PG_type_mask) == PGT_writeable_page)));
+ if ( writeable && (put_page_type(page) == 0) )
+ {
+ tlb_flush[smp_processor_id()] = 1;
+ page->flags &= ~PG_type_mask;
+ }
+ put_page_tot(page);
+}
+
+
+static int mod_l2_entry(unsigned long pa, l2_pgentry_t new_l2_entry)
+{
+ l2_pgentry_t *p_l2_entry, old_l2_entry;
+
+ p_l2_entry = map_domain_mem(pa);
+ old_l2_entry = *p_l2_entry;
+
+ if ( (((unsigned long)p_l2_entry & (PAGE_SIZE-1)) >> 2) >=
+ DOMAIN_ENTRIES_PER_L2_PAGETABLE )
+ {
+ MEM_LOG("Illegal L2 update attempt in hypervisor area %p",
+ p_l2_entry);
+ goto fail;
+ }
+
+ if ( (l2_pgentry_val(new_l2_entry) & _PAGE_PRESENT) )
+ {
+ if ( (l2_pgentry_val(new_l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
+ {
+ MEM_LOG("Bad L2 entry val %04lx",
+ l2_pgentry_val(new_l2_entry) &
+ (_PAGE_GLOBAL|_PAGE_PSE));
+ goto fail;
+ }
+ /* Differ in mapping (bits 12-31) or presence (bit 0)? */
+ if ( ((l2_pgentry_val(old_l2_entry) ^
+ l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
+ {
+ if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
+ {
+ put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
+ }
+
+ /* Assume we're mapping an L1 table, falling back to twisted L2. */
+ if ( get_l1_table(l2_pgentry_to_pagenr(new_l2_entry)) &&
+ get_twisted_l2_table(pa >> PAGE_SHIFT, new_l2_entry) )
+ goto fail;
+ }
+ }
+ else if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
+ {
+ put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
+ }
+
+ *p_l2_entry = new_l2_entry;
+ unmap_domain_mem(p_l2_entry);
+ return 0;
+
+ fail:
+ unmap_domain_mem(p_l2_entry);
+ return -1;
+}
+
+
+static int mod_l1_entry(unsigned long pa, l1_pgentry_t new_l1_entry)
+{
+ l1_pgentry_t *p_l1_entry, old_l1_entry;
+
+ p_l1_entry = map_domain_mem(pa);
+ old_l1_entry = *p_l1_entry;
+
+ if ( (l1_pgentry_val(new_l1_entry) & _PAGE_PRESENT) )
+ {
+ if ( (l1_pgentry_val(new_l1_entry) &
+ (_PAGE_GLOBAL|_PAGE_PAT)) )
+ {
+
+ MEM_LOG("Bad L1 entry val %04lx",
+ l1_pgentry_val(new_l1_entry) &
+ (_PAGE_GLOBAL|_PAGE_PAT));
+ goto fail;
+ }
+ /*
+ * Differ in mapping (bits 12-31), writeable (bit 1), or
+ * presence (bit 0)?
+ */
+ if ( ((l1_pgentry_val(old_l1_entry) ^
+ l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
+ {
+ if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
+ {
+ put_page(l1_pgentry_to_pagenr(old_l1_entry),
+ l1_pgentry_val(old_l1_entry) & _PAGE_RW);
+ }
+
+ if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
+ l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){
+ goto fail;
+ }
+ }
+ }
+ else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
+ {
+ put_page(l1_pgentry_to_pagenr(old_l1_entry),
+ l1_pgentry_val(old_l1_entry) & _PAGE_RW);
+ }
+
+ *p_l1_entry = new_l1_entry;
+ unmap_domain_mem(p_l1_entry);
+ return 0;
+
+ fail:
+ unmap_domain_mem(p_l1_entry);
+ return -1;
+}
+
+
+static int do_extended_command(unsigned long ptr, unsigned long val)
+{
+ int err = 0;
+ unsigned long pfn = ptr >> PAGE_SHIFT;
+ struct pfn_info *page = frame_table + pfn;
+
+ switch ( (val & PGEXT_CMD_MASK) )
+ {
+ case PGEXT_PIN_L1_TABLE:
+ err = get_l1_table(pfn);
+ goto mark_as_pinned;
+ case PGEXT_PIN_L2_TABLE:
+ err = get_l2_table(pfn);
+ mark_as_pinned:
+ if ( err )
+ {
+ MEM_LOG("Error while pinning pfn %08lx", pfn);
+ break;
+ }
+ put_page_type(page);
+ put_page_tot(page);
+ if ( !(page->type_count & REFCNT_PIN_BIT) )
+ {
+ page->type_count |= REFCNT_PIN_BIT;
+ page->tot_count |= REFCNT_PIN_BIT;
+ }
+ else
+ {
+ MEM_LOG("Pfn %08lx already pinned", pfn);
+ err = 1;
+ }
+ break;
+
+ case PGEXT_UNPIN_TABLE:
+ if ( !DOMAIN_OKAY(page->flags) )
+ {
+ err = 1;
+ MEM_LOG("Page %08lx bad domain (dom=%ld)",
+ ptr, page->flags & PG_domain_mask);
+ }
+ else if ( (page->type_count & REFCNT_PIN_BIT) )
+ {
+ page->type_count &= ~REFCNT_PIN_BIT;
+ page->tot_count &= ~REFCNT_PIN_BIT;
+ get_page_type(page);
+ get_page_tot(page);
+ ((page->flags & PG_type_mask) == PGT_l1_page_table) ?
+ put_l1_table(pfn) : put_l2_table(pfn);
+ }
+ else
+ {
+ err = 1;
+ MEM_LOG("Pfn %08lx not pinned", pfn);
+ }
+ break;
+
+ case PGEXT_NEW_BASEPTR:
+ err = get_l2_table(pfn);
+ if ( !err )
+ {
+ put_l2_table(pagetable_val(current->mm.pagetable) >> PAGE_SHIFT);
+ current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
+ }
+ else
+ {
+ MEM_LOG("Error while installing new baseptr %08lx %d", ptr, err);
+ }
+ /* fall through */
+
+ case PGEXT_TLB_FLUSH:
+ tlb_flush[smp_processor_id()] = 1;
+ break;
+
+ case PGEXT_INVLPG:
+ __flush_tlb_one(val & ~PGEXT_CMD_MASK);
+ break;
+
+ default:
+ MEM_LOG("Invalid extended pt command 0x%08lx", val & PGEXT_CMD_MASK);
+ err = 1;
+ break;
+ }
+
+ return err;
+}
+
+int do_process_page_updates(page_update_request_t *ureqs, int count)
+{
+ page_update_request_t req;
+ unsigned long flags, pfn;
+ struct pfn_info *page;
+ int err = 0, i;
+
+ for ( i = 0; i < count; i++ )
+ {
+ if ( copy_from_user(&req, ureqs, sizeof(req)) )
+ {
+ kill_domain_with_errmsg("Cannot read page update request");
+ }
+
+ pfn = req.ptr >> PAGE_SHIFT;
+ if ( pfn >= max_page )
+ {
+ MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
+ kill_domain_with_errmsg("Page update request out of range");
+ }
+
+ err = 1;
+
+ /* Least significant bits of 'ptr' demux the operation type. */
+ switch ( req.ptr & (sizeof(l1_pgentry_t)-1) )
+ {
+ /*
+ * PGREQ_NORMAL: Normal update to any level of page table.
+ */
+ case PGREQ_NORMAL:
+ page = frame_table + pfn;
+ flags = page->flags;
+
+ if ( DOMAIN_OKAY(flags) )
+ {
+ switch ( (flags & PG_type_mask) )
+ {
+ case PGT_l1_page_table:
+ err = mod_l1_entry(req.ptr, mk_l1_pgentry(req.val));
+ break;
+ case PGT_l2_page_table:
+ err = mod_l2_entry(req.ptr, mk_l2_pgentry(req.val));
+ break;
+ default:
+ MEM_LOG("Update to non-pt page %08lx", req.ptr);
+ break;
+ }
+ }
+ else
+ {
+ MEM_LOG("Bad domain normal update (dom %d, pfn %ld)",
+ current->domain, pfn);
+ }
+ break;
+
+ case PGREQ_MPT_UPDATE:
+ page = frame_table + pfn;
+ if ( DOMAIN_OKAY(page->flags) )
+ {
+ machine_to_phys_mapping[pfn] = req.val;
+ err = 0;
+ }
+ else
+ {
+ MEM_LOG("Bad domain MPT update (dom %d, pfn %ld)",
+ current->domain, pfn);
+ }
+ break;
+
+ /*
+ * PGREQ_EXTENDED_COMMAND: Extended command is specified
+ * in the least-siginificant bits of the 'value' field.
+ */
+ case PGREQ_EXTENDED_COMMAND:
+ req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
+ err = do_extended_command(req.ptr, req.val);
+ break;
+
+ case PGREQ_UNCHECKED_UPDATE:
+ req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
+ if ( current->domain == 0 )
+ {
+ unsigned long *ptr = map_domain_mem(req.ptr);
+ *ptr = req.val;
+ unmap_domain_mem(ptr);
+ err = 0;
+ }
+ else
+ {
+ MEM_LOG("Bad unchecked update attempt");
+ }
+ break;
+
+ default:
+ MEM_LOG("Invalid page update command %08lx", req.ptr);
+ break;
+ }
+
+ if ( err )
+ {
+ kill_domain_with_errmsg("Illegal page update request");
+ }
+
+ ureqs++;
+ }
+
+ if ( tlb_flush[smp_processor_id()] )
+ {
+ tlb_flush[smp_processor_id()] = 0;
+ __write_cr3_counted(pagetable_val(current->mm.pagetable));
+
+ }
+
+ return(0);
+}
+
diff --git a/xen/common/network.c b/xen/common/network.c
new file mode 100644
index 0000000000..f761ca9ba2
--- /dev/null
+++ b/xen/common/network.c
@@ -0,0 +1,475 @@
+/* network.c
+ *
+ * Network virtualization for Xen. Lower-level network interactions are in
+ * net/dev.c and in the drivers. This file contains routines to interact
+ * with the virtual interfaces (vifs) and the virtual firewall/router through
+ * the use of rules.
+ *
+ * Copyright (c) 2002, A K Warfield and K A Fraser
+ */
+
+#include <hypervisor-ifs/network.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <xeno/init.h>
+#include <xeno/slab.h>
+#include <xeno/spinlock.h>
+#include <xeno/if_ether.h>
+#include <linux/skbuff.h>
+#include <xeno/netdevice.h>
+#include <xeno/in.h>
+#include <asm/domain_page.h>
+#include <asm/io.h>
+
+/* vif globals
+ * sys_vif_list is a lookup table for vifs, used in packet forwarding.
+ * it will be replaced later by something a little more flexible.
+ */
+
+int sys_vif_count; /* global vif count */
+net_vif_t *sys_vif_list[MAX_SYSTEM_VIFS]; /* global vif array */
+net_rule_ent_t *net_rule_list; /* global list of rules */
+kmem_cache_t *net_vif_cache;
+kmem_cache_t *net_rule_cache;
+static rwlock_t net_rule_lock = RW_LOCK_UNLOCKED; /* rule mutex */
+static rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED; /* vif mutex */
+
+void print_net_rule_list();
+
+
+/* ----[ VIF Functions ]----------------------------------------------------*/
+
+/* create_net_vif - Create a new vif and append it to the specified domain.
+ *
+ * the domain is examined to determine how many vifs currently are allocated
+ * and the newly allocated vif is appended. The vif is also added to the
+ * global list.
+ *
+ */
+
+net_vif_t *create_net_vif(int domain)
+{
+ net_vif_t *new_vif;
+ net_ring_t *new_ring;
+ net_shadow_ring_t *shadow_ring;
+ struct task_struct *dom_task;
+
+ if ( !(dom_task = find_domain_by_id(domain)) )
+ return NULL;
+
+ if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL )
+ return NULL;
+
+ new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
+ memset(new_ring, 0, sizeof(net_ring_t));
+
+ shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
+ if ( shadow_ring == NULL ) goto fail;
+
+ shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
+ * sizeof(rx_shadow_entry_t), GFP_KERNEL);
+ shadow_ring->tx_ring = kmalloc(TX_RING_SIZE
+ * sizeof(tx_shadow_entry_t), GFP_KERNEL);
+ if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) )
+ goto fail;
+
+ shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
+ shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0;
+
+ /* Fill in the new vif struct. */
+
+ new_vif->net_ring = new_ring;
+ new_vif->shadow_ring = shadow_ring;
+
+ new_vif->domain = find_domain_by_id(domain);
+
+ new_vif->list.next = NULL;
+
+ write_lock(&sys_vif_lock);
+ new_vif->id = sys_vif_count;
+ sys_vif_list[sys_vif_count++] = new_vif;
+ write_unlock(&sys_vif_lock);
+
+ dom_task->net_vif_list[dom_task->num_net_vifs] = new_vif;
+ dom_task->num_net_vifs++;
+
+ return new_vif;
+
+fail:
+ kmem_cache_free(net_vif_cache, new_vif);
+ if ( shadow_ring != NULL )
+ {
+ if ( shadow_ring->rx_ring ) kfree(shadow_ring->rx_ring);
+ if ( shadow_ring->tx_ring ) kfree(shadow_ring->tx_ring);
+ kfree(shadow_ring);
+ }
+ return NULL;
+}
+
+/* delete_net_vif - Delete the last vif in the given domain.
+ *
+ * There doesn't seem to be any reason (yet) to be able to axe an arbitrary
+ * vif, by vif id.
+ */
+
+void destroy_net_vif(struct task_struct *p)
+{
+ int i;
+
+ if ( p->num_net_vifs <= 0 ) return; // nothing to do.
+
+ i = --p->num_net_vifs;
+
+ write_lock(&sys_vif_lock);
+ sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
+ write_unlock(&sys_vif_lock);
+
+ kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
+ kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
+ kfree(p->net_vif_list[i]->shadow_ring);
+ kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
+}
+
+/* vif_query - Call from the proc file system to get a list of vifs
+ * assigned to a particular domain.
+ */
+
+void vif_query(vif_query_t *vq)
+{
+ struct task_struct *dom_task;
+ char buf[128];
+ int i;
+
+ if ( !(dom_task = find_domain_by_id(vq->domain)) ) return;
+
+ *buf = '\0';
+
+ for ( i = 0; i < dom_task->num_net_vifs; i++ )
+ sprintf(buf + strlen(buf), "%d\n", dom_task->net_vif_list[i]->id);
+
+ copy_to_user(vq->buf, buf, strlen(buf) + 1);
+
+}
+
+
+/* print_vif_list - Print the contents of the global vif table.
+ */
+
+void print_vif_list()
+{
+ int i;
+ net_vif_t *v;
+
+ printk("Currently, there are %d VIFs.\n", sys_vif_count);
+ for ( i = 0; i<sys_vif_count; i++ )
+ {
+ v = sys_vif_list[i];
+ printk("] VIF Entry %d(%d):\n", i, v->id);
+ printk(" > net_ring*: %p\n", v->net_ring);
+ printk(" > domain : %u\n", v->domain->domain);
+ }
+}
+
+/* ----[ Net Rule Functions ]-----------------------------------------------*/
+
+/* add_net_rule - Add a new network filter rule.
+ */
+
+int add_net_rule(net_rule_t *rule)
+{
+ net_rule_ent_t *new_ent;
+
+ if ( (new_ent = kmem_cache_alloc(net_rule_cache, GFP_KERNEL)) == NULL )
+ {
+ return -ENOMEM;
+ }
+
+ memcpy(&new_ent->r, rule, sizeof(net_rule_t));
+
+ write_lock(&net_rule_lock);
+ new_ent->next = net_rule_list;
+ net_rule_list = new_ent;
+ write_unlock(&net_rule_lock);
+
+ return 0;
+}
+
+/* delete_net_rule - Delete an existing network rule.
+ */
+
+int delete_net_rule(net_rule_t *rule)
+{
+ net_rule_ent_t *ent = net_rule_list, *prev = NULL;
+ while ( (ent) && ((memcmp(rule, &ent->r, sizeof(net_rule_t))) != 0) )
+ {
+ prev = ent;
+ ent = ent->next;
+ }
+
+ if (ent != NULL)
+ {
+ write_lock(&net_rule_lock);
+ if (prev != NULL)
+ {
+ prev->next = ent->next;
+ }
+ else
+ {
+ net_rule_list = ent->next;
+ }
+ kmem_cache_free(net_rule_cache, ent);
+ write_unlock(&net_rule_lock);
+ }
+ return 0;
+}
+
+/* add_default_net_rule - Set up default network path (ie for dom0).
+ *
+ * this is a utility function to route all traffic with the specified
+ * ip address to the specified vif. It's used to set up domain zero.
+ */
+
+void add_default_net_rule(int vif_id, u32 ipaddr)
+{
+ net_rule_t new_rule;
+
+ //outbound rule.
+ memset(&new_rule, 0, sizeof(net_rule_t));
+ new_rule.src_addr = ipaddr;
+ new_rule.src_addr_mask = 0xffffffff;
+ new_rule.src_interface = vif_id;
+ new_rule.dst_interface = VIF_PHYSICAL_INTERFACE;
+ new_rule.action = NETWORK_ACTION_ACCEPT;
+ new_rule.proto = NETWORK_PROTO_ANY;
+ add_net_rule(&new_rule);
+
+ //inbound rule;
+ memset(&new_rule, 0, sizeof(net_rule_t));
+ new_rule.dst_addr = ipaddr;
+ new_rule.dst_addr_mask = 0xffffffff;
+ new_rule.src_interface = VIF_PHYSICAL_INTERFACE;
+ new_rule.dst_interface = vif_id;
+ new_rule.action = NETWORK_ACTION_ACCEPT;
+ new_rule.proto = NETWORK_PROTO_ANY;
+ add_net_rule(&new_rule);
+
+}
+
+/* print_net_rule - Print a single net rule.
+ */
+
+void print_net_rule(net_rule_t *r)
+{
+ printk("===] NET RULE:\n");
+ printk("=] src_addr : %lu\n", (unsigned long) r->src_addr);
+ printk("=] src_addr_mask : %lu\n", (unsigned long) r->src_addr_mask);
+ printk("=] dst_addr : %lu\n", (unsigned long) r->dst_addr);
+ printk("=] dst_addr_mask : %lu\n", (unsigned long) r->dst_addr_mask);
+ printk("=] src_port : %u\n", r->src_port);
+ printk("=] src_port_mask : %u\n", r->src_port_mask);
+ printk("=] dst_port : %u\n", r->dst_port);
+ printk("=] dst_port_mask : %u\n", r->dst_port_mask);
+ printk("=] dst_proto : %u\n", r->proto);
+ printk("=] src_interface : %d\n", r->src_interface);
+ printk("=] dst_interface : %d\n", r->dst_interface);
+ printk("=] action : %u\n", r->action);
+}
+
+/* print_net_rule_list - Print the global rule table.
+ */
+
+void print_net_rule_list()
+{
+ net_rule_ent_t *ent;
+ int count = 0;
+
+ read_lock(&net_rule_lock);
+
+ ent = net_rule_list;
+
+ while (ent)
+ {
+ print_net_rule(&ent->r);
+ ent = ent->next;
+ count++;
+ }
+ printk("\nTotal of %d rules.\n", count);
+
+ read_unlock(&net_rule_lock);
+}
+
+/* net_find_rule - Find the destination vif according to the current rules.
+ *
+ * Apply the rules to this skbuff and return the vif id that it is bound for.
+ * If there is no match, VIF_DROP is returned.
+ */
+
+int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port, u16 dst_port,
+ int src_vif)
+{
+ net_rule_ent_t *ent;
+ int dest = VIF_DROP;
+
+ read_lock(&net_rule_lock);
+
+ ent = net_rule_list;
+
+ while (ent)
+ {
+ if ( ((ent->r.src_interface == src_vif)
+ || (ent->r.src_interface == VIF_ANY_INTERFACE)) &&
+
+ (!((ent->r.src_addr ^ src_addr) & ent->r.src_addr_mask )) &&
+ (!((ent->r.dst_addr ^ dst_addr) & ent->r.dst_addr_mask )) &&
+ (!((ent->r.src_port ^ src_port) & ent->r.src_port_mask )) &&
+ (!((ent->r.dst_port ^ dst_port) & ent->r.dst_port_mask )) &&
+
+ ((ent->r.proto == NETWORK_PROTO_ANY) ||
+ ((ent->r.proto == NETWORK_PROTO_IP) &&
+ (nproto == (u8)ETH_P_IP)) ||
+ ((ent->r.proto == NETWORK_PROTO_ARP) &&
+ (nproto == (u8)ETH_P_ARP)) ||
+ ((ent->r.proto == NETWORK_PROTO_TCP) &&
+ (tproto == IPPROTO_TCP)) ||
+ ((ent->r.proto == NETWORK_PROTO_UDP) &&
+ (tproto == IPPROTO_UDP)))
+ )
+ {
+ break;
+ }
+ ent = ent->next;
+ }
+
+ if (ent) (dest = ent->r.dst_interface);
+ read_unlock(&net_rule_lock);
+ return dest;
+}
+
+/* net_get_target_vif - Find the vif that the given sk_buff is bound for.
+ *
+ * This is intended to be the main interface to the VFR rules, where
+ * net_find_rule (above) is a private aspect of the current matching
+ * implementation. All in-hypervisor routing should use this function only
+ * to ensure that this can be rewritten later.
+ *
+ * Currently, network rules are stored in a global linked list. New rules are
+ * added to the front of this list, and (at present) the first matching rule
+ * determines the vif that a packet is sent to. This is obviously not ideal,
+ * it might be more advisable to have chains, or at lest most-specific
+ * matching, and moreover routing latency increases linearly (for old rules)
+ * as new rules are added.
+ *
+ * net_get_target_vif examines the sk_buff and pulls out the relevant fields
+ * based on the packet type. it then calls net_find_rule to scan the rule
+ * list.
+ */
+
+#define net_get_target_vif(skb) __net_get_target_vif(skb->data, skb->len, skb->src_vif)
+
+int __net_get_target_vif(u8 *data, unsigned int len, int src_vif)
+{
+ int target = VIF_DROP;
+ u8 *h_raw, *nh_raw;
+
+ if ( len < ETH_HLEN ) goto drop;
+
+ nh_raw = data + ETH_HLEN;
+ switch ( ntohs(*(unsigned short *)(data + 12)) )
+ {
+ case ETH_P_ARP:
+ if ( len < (ETH_HLEN + 28) ) goto drop;
+ target = net_find_rule((u8)ETH_P_ARP, 0, ntohl(*(u32 *)(nh_raw + 14)),
+ ntohl(*(u32 *)(nh_raw + 24)), 0, 0,
+ src_vif);
+ break;
+
+ case ETH_P_IP:
+ if ( len < (ETH_HLEN + 20) ) goto drop;
+ h_raw = data + ((*(unsigned char *)(nh_raw)) & 0x0f) * 4;
+
+ /* XXX For now, we ignore ports. */
+#if 0
+ target = net_find_rule((u8)ETH_P_IP, *(u8 *)(nh_raw + 9),
+ ntohl(*(u32 *)(nh_raw + 12)),
+ ntohl(*(u32 *)(nh_raw + 16)),
+ ntohs(*(u16 *)(h_raw)),
+ ntohs(*(u16 *)(h_raw + 2)),
+ src_vif);
+#else
+ target = net_find_rule((u8)ETH_P_IP, *(u8 *)(data + 9),
+ ntohl(*(u32 *)(nh_raw + 12)),
+ ntohl(*(u32 *)(nh_raw + 16)),
+ 0,
+ 0,
+ src_vif);
+#endif
+ }
+ return target;
+
+ drop:
+ return VIF_DROP;
+}
+
+/* ----[ Syscall Interface ]------------------------------------------------*/
+
+/*
+ * This is the hook function to handle guest-invoked traps requesting
+ * changes to the network system.
+ */
+
+long do_network_op(network_op_t *u_network_op)
+{
+ long ret=0;
+ network_op_t op;
+
+ if ( current->domain != 0 )
+ return -EPERM;
+
+ if ( copy_from_user(&op, u_network_op, sizeof(op)) )
+ return -EFAULT;
+ switch ( op.cmd )
+ {
+
+ case NETWORK_OP_ADDRULE:
+ {
+ add_net_rule(&op.u.net_rule);
+ }
+ break;
+
+ case NETWORK_OP_DELETERULE:
+ {
+ delete_net_rule(&op.u.net_rule);
+ }
+ break;
+
+ case NETWORK_OP_GETRULELIST:
+ {
+ // This should eventually ship a rule list up to the VM
+ // to be printed in its procfs. For now, we just print the rules.
+
+ print_net_rule_list();
+ }
+ break;
+
+ case NETWORK_OP_VIFQUERY:
+ {
+ vif_query(&op.u.vif_query);
+ }
+
+ default:
+ ret = -ENOSYS;
+ }
+
+ return ret;
+}
+
+void __init net_init (void)
+{
+ sys_vif_count = 0;
+ memset(sys_vif_list, 0, sizeof(sys_vif_list));
+ net_rule_list = NULL;
+ net_vif_cache = kmem_cache_create("net_vif_cache", sizeof(net_vif_t),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ net_rule_cache = kmem_cache_create("net_rule_cache", sizeof(net_rule_ent_t),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+}
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
new file mode 100644
index 0000000000..1bfeed440f
--- /dev/null
+++ b/xen/common/page_alloc.c
@@ -0,0 +1,288 @@
+/******************************************************************************
+ * page_alloc.c
+ *
+ * Simple buddy allocator for Xenoserver hypervisor.
+ *
+ * Copyright (c) 2002 K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <asm/page.h>
+#include <xeno/spinlock.h>
+#include <xeno/slab.h>
+
+static spinlock_t alloc_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*********************
+ * ALLOCATION BITMAP
+ * One bit per page of memory. Bit set => page is allocated.
+ */
+
+static unsigned long *alloc_bitmap;
+#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
+
+#define allocated_in_map(_pn) \
+(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
+
+
+/*
+ * Hint regarding bitwise arithmetic in map_{alloc,free}:
+ * -(1<<n) sets all bits >= n.
+ * (1<<n)-1 sets all bits < n.
+ * Variable names in map_{alloc,free}:
+ * *_idx == Index into `alloc_bitmap' array.
+ * *_off == Bit offset within an element of the `alloc_bitmap' array.
+ */
+
+static void map_alloc(unsigned long first_page, unsigned long nr_pages)
+{
+ unsigned long start_off, end_off, curr_idx, end_idx;
+
+ curr_idx = first_page / PAGES_PER_MAPWORD;
+ start_off = first_page & (PAGES_PER_MAPWORD-1);
+ end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+ end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+ if ( curr_idx == end_idx )
+ {
+ alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
+ }
+ else
+ {
+ alloc_bitmap[curr_idx] |= -(1<<start_off);
+ while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
+ alloc_bitmap[curr_idx] |= (1<<end_off)-1;
+ }
+}
+
+
+static void map_free(unsigned long first_page, unsigned long nr_pages)
+{
+ unsigned long start_off, end_off, curr_idx, end_idx;
+
+ curr_idx = first_page / PAGES_PER_MAPWORD;
+ start_off = first_page & (PAGES_PER_MAPWORD-1);
+ end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+ end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+ if ( curr_idx == end_idx )
+ {
+ alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
+ }
+ else
+ {
+ alloc_bitmap[curr_idx] &= (1<<start_off)-1;
+ while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
+ alloc_bitmap[curr_idx] &= -(1<<end_off);
+ }
+}
+
+
+
+/*************************
+ * BINARY BUDDY ALLOCATOR
+ */
+
+typedef struct chunk_head_st chunk_head_t;
+typedef struct chunk_tail_st chunk_tail_t;
+
+struct chunk_head_st {
+ chunk_head_t *next;
+ chunk_head_t **pprev;
+ int level;
+};
+
+struct chunk_tail_st {
+ int level;
+};
+
+/* Linked lists of free chunks of different powers-of-two in size. */
+#define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
+static chunk_head_t *free_head[FREELIST_SIZE];
+static chunk_head_t free_tail[FREELIST_SIZE];
+#define FREELIST_EMPTY(_l) ((_l)->next == NULL)
+
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+
+/* Initialise allocator, placing addresses [@min,@max] in free pool. */
+void __init init_page_allocator(unsigned long min, unsigned long max)
+{
+ int i;
+ unsigned long range, bitmap_size;
+ chunk_head_t *ch;
+ chunk_tail_t *ct;
+
+ for ( i = 0; i < FREELIST_SIZE; i++ )
+ {
+ free_head[i] = &free_tail[i];
+ free_tail[i].pprev = &free_head[i];
+ free_tail[i].next = NULL;
+ }
+
+ min = round_pgup (min);
+ max = round_pgdown(max);
+
+ /* Allocate space for the allocation bitmap. */
+ bitmap_size = (max+1) >> (PAGE_SHIFT+3);
+ bitmap_size = round_pgup(bitmap_size);
+ alloc_bitmap = (unsigned long *)__va(min);
+ min += bitmap_size;
+ range = max - min;
+
+ /* All allocated by default. */
+ memset(alloc_bitmap, ~0, bitmap_size);
+ /* Free up the memory we've been given to play with. */
+ map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
+
+ /* The buddy lists are addressed in high memory. */
+ min += PAGE_OFFSET;
+ max += PAGE_OFFSET;
+
+ while ( range != 0 )
+ {
+ /*
+ * Next chunk is limited by alignment of min, but also
+ * must not be bigger than remaining range.
+ */
+ for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
+ if ( min & (1<<i) ) break;
+
+ ch = (chunk_head_t *)min;
+ min += (1<<i);
+ range -= (1<<i);
+ ct = (chunk_tail_t *)min-1;
+ i -= PAGE_SHIFT;
+ ch->level = i;
+ ch->next = free_head[i];
+ ch->pprev = &free_head[i];
+ ch->next->pprev = &ch->next;
+ free_head[i] = ch;
+ ct->level = i;
+ }
+}
+
+
+/* Allocate 2^@order contiguous pages. */
+unsigned long __get_free_pages(int mask, int order)
+{
+ int i, attempts = 0;
+ chunk_head_t *alloc_ch, *spare_ch;
+ chunk_tail_t *spare_ct;
+ unsigned long flags;
+
+retry:
+ spin_lock_irqsave(&alloc_lock, flags);
+
+
+ /* Find smallest order which can satisfy the request. */
+ for ( i = order; i < FREELIST_SIZE; i++ ) {
+ if ( !FREELIST_EMPTY(free_head[i]) )
+ break;
+ }
+
+ if ( i == FREELIST_SIZE ) goto no_memory;
+
+ /* Unlink a chunk. */
+ alloc_ch = free_head[i];
+ free_head[i] = alloc_ch->next;
+ alloc_ch->next->pprev = alloc_ch->pprev;
+
+ /* We may have to break the chunk a number of times. */
+ while ( i != order )
+ {
+ /* Split into two equal parts. */
+ i--;
+ spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
+ spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
+
+ /* Create new header for spare chunk. */
+ spare_ch->level = i;
+ spare_ch->next = free_head[i];
+ spare_ch->pprev = &free_head[i];
+ spare_ct->level = i;
+
+ /* Link in the spare chunk. */
+ spare_ch->next->pprev = &spare_ch->next;
+ free_head[i] = spare_ch;
+ }
+
+ map_alloc(__pa(alloc_ch)>>PAGE_SHIFT, 1<<order);
+
+ spin_unlock_irqrestore(&alloc_lock, flags);
+
+ return((unsigned long)alloc_ch);
+
+ no_memory:
+ if ( attempts++ < 8 )
+ {
+ spin_unlock_irqrestore(&alloc_lock, flags);
+ kmem_cache_reap(0);
+ goto retry;
+ }
+
+ printk("Cannot handle page request order %d!\n", order);
+ dump_slabinfo();
+
+ return 0;
+}
+
+
+/* Free 2^@order pages at location @p. */
+void __free_pages(unsigned long p, int order)
+{
+ unsigned long size = 1 << (order + PAGE_SHIFT);
+ chunk_head_t *ch;
+ chunk_tail_t *ct;
+ unsigned long flags;
+ unsigned long pagenr = __pa(p) >> PAGE_SHIFT;
+
+ spin_lock_irqsave(&alloc_lock, flags);
+
+ map_free(pagenr, 1<<order);
+
+ /* Merge chunks as far as possible. */
+ for ( ; ; )
+ {
+ if ( (p & size) )
+ {
+ /* Merge with predecessor block? */
+ if ( allocated_in_map(pagenr-1) ) break;
+ ct = (chunk_tail_t *)p - 1;
+ if ( ct->level != order ) break;
+ ch = (chunk_head_t *)(p - size);
+ p -= size;
+ }
+ else
+ {
+ /* Merge with successor block? */
+ if ( allocated_in_map(pagenr+(1<<order)) ) break;
+ ch = (chunk_head_t *)(p + size);
+ if ( ch->level != order ) break;
+ }
+
+ /* Okay, unlink the neighbour. */
+ *ch->pprev = ch->next;
+ ch->next->pprev = ch->pprev;
+
+ order++;
+ size <<= 1;
+ }
+
+ /* Okay, add the final chunk to the appropriate free list. */
+ ch = (chunk_head_t *)p;
+ ct = (chunk_tail_t *)(p+size)-1;
+ ct->level = order;
+ ch->level = order;
+ ch->pprev = &free_head[order];
+ ch->next = free_head[order];
+ ch->next->pprev = &ch->next;
+ free_head[order] = ch;
+
+ spin_unlock_irqrestore(&alloc_lock, flags);
+}
diff --git a/xen/common/perfc.c b/xen/common/perfc.c
new file mode 100644
index 0000000000..55554eba70
--- /dev/null
+++ b/xen/common/perfc.c
@@ -0,0 +1,81 @@
+/*
+ * xen performance counters
+ */
+
+#include <xeno/perfc.h>
+#include <xeno/keyhandler.h>
+
+#define PERFCOUNTER( var, name ) "[0]"name"\0",
+#define PERFCOUNTER_ARRAY( var, name, size ) "["#size"]"name"\0",
+
+char* perfc_name[] = {
+#include <xeno/perfc_defn.h>
+};
+
+struct perfcounter_t perfcounters;
+
+void __perfc_print (unsigned long counter[], int offset)
+{
+ int loop;
+ int total_size = 0;
+ int element_size = 0;
+ int num = 0;
+
+ for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++)
+ {
+ num = sscanf (perfc_name[loop], "[%d]", &element_size);
+ total_size += element_size == 0 ? 1 : element_size;
+ if (total_size > offset) break;
+ }
+ if (loop == sizeof(perfc_name) / sizeof(char *))
+ {
+ printf ("error: couldn't find variable\n");
+ return;
+ }
+ if (element_size == 0) /* single counter */
+ {
+ printf ("%10ld 0x%08lx %s\n", counter[0], counter[0],
+ perfc_name[loop] + 2 + num);
+ }
+ else /* show entire array */
+ {
+ for (loop = 0; loop < element_size; loop++)
+ {
+ printf ("%10ld 0x%08lx %s:%d\n",
+ counter[loop], counter[loop],
+ perfc_name[loop] + 2 + num, loop);
+ }
+ }
+ return;
+}
+
+void perfc_printall (u_char key, void *dev_id, struct pt_regs *regs)
+{
+ int loop, idx;
+ int element_size;
+ int num;
+ unsigned long *counters = (unsigned long *)&perfcounters;
+
+ printf ("xen performance counters\n");
+ for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++)
+ {
+ num = sscanf (perfc_name[loop], "[%d]", &element_size);
+
+ for (idx = 0; idx < (element_size ? element_size : 1); idx++)
+ {
+ if (element_size)
+ {
+ printf ("%10ld 0x%08lx %s:%d\n",
+ *counters, *counters, perfc_name[loop] + num + 2, idx);
+ }
+ else
+ {
+ printf ("%10ld 0x%08lx %s\n",
+ *counters, *counters, perfc_name[loop] + num + 2);
+ }
+ counters++;
+ }
+ }
+
+ return;
+}
diff --git a/xen/common/resource.c b/xen/common/resource.c
new file mode 100644
index 0000000000..406fb256a7
--- /dev/null
+++ b/xen/common/resource.c
@@ -0,0 +1,332 @@
+/*
+ * linux/kernel/resource.c
+ *
+ * Copyright (C) 1999 Linus Torvalds
+ * Copyright (C) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * Arbitrary resource management.
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+
+struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO };
+struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM };
+
+static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
+
+/*
+ * This generates reports for /proc/ioports and /proc/iomem
+ */
+static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end)
+{
+ if (offset < 0)
+ offset = 0;
+
+ while (entry) {
+ const char *name = entry->name;
+ unsigned long from, to;
+
+ if ((int) (end-buf) < 80)
+ return buf;
+
+ from = entry->start;
+ to = entry->end;
+ if (!name)
+ name = "<BAD>";
+
+ buf += sprintf(buf, fmt + offset, from, to, name);
+ if (entry->child)
+ buf = do_resource_list(entry->child, fmt, offset-2, buf, end);
+ entry = entry->sibling;
+ }
+
+ return buf;
+}
+
+int get_resource_list(struct resource *root, char *buf, int size)
+{
+ char *fmt;
+ int retval;
+
+ fmt = " %08lx-%08lx : %s\n";
+ if (root->end < 0x10000)
+ fmt = " %04lx-%04lx : %s\n";
+ read_lock(&resource_lock);
+ retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf;
+ read_unlock(&resource_lock);
+ return retval;
+}
+
+/* Return the conflict entry if you can't request it */
+static struct resource * __request_resource(struct resource *root, struct resource *new)
+{
+ unsigned long start = new->start;
+ unsigned long end = new->end;
+ struct resource *tmp, **p;
+
+ if (end < start)
+ return root;
+ if (start < root->start)
+ return root;
+ if (end > root->end)
+ return root;
+ p = &root->child;
+ for (;;) {
+ tmp = *p;
+ if (!tmp || tmp->start > end) {
+ new->sibling = tmp;
+ *p = new;
+ new->parent = root;
+ return NULL;
+ }
+ p = &tmp->sibling;
+ if (tmp->end < start)
+ continue;
+ return tmp;
+ }
+}
+
+static int __release_resource(struct resource *old)
+{
+ struct resource *tmp, **p;
+
+ p = &old->parent->child;
+ for (;;) {
+ tmp = *p;
+ if (!tmp)
+ break;
+ if (tmp == old) {
+ *p = tmp->sibling;
+ old->parent = NULL;
+ return 0;
+ }
+ p = &tmp->sibling;
+ }
+ return -EINVAL;
+}
+
+int request_resource(struct resource *root, struct resource *new)
+{
+ struct resource *conflict;
+
+ write_lock(&resource_lock);
+ conflict = __request_resource(root, new);
+ write_unlock(&resource_lock);
+ return conflict ? -EBUSY : 0;
+}
+
+int release_resource(struct resource *old)
+{
+ int retval;
+
+ write_lock(&resource_lock);
+ retval = __release_resource(old);
+ write_unlock(&resource_lock);
+ return retval;
+}
+
+int check_resource(struct resource *root, unsigned long start, unsigned long len)
+{
+ struct resource *conflict, tmp;
+
+ tmp.start = start;
+ tmp.end = start + len - 1;
+ write_lock(&resource_lock);
+ conflict = __request_resource(root, &tmp);
+ if (!conflict)
+ __release_resource(&tmp);
+ write_unlock(&resource_lock);
+ return conflict ? -EBUSY : 0;
+}
+
+/*
+ * Find empty slot in the resource tree given range and alignment.
+ */
+static int find_resource(struct resource *root, struct resource *new,
+ unsigned long size,
+ unsigned long min, unsigned long max,
+ unsigned long align,
+ void (*alignf)(void *, struct resource *,
+ unsigned long, unsigned long),
+ void *alignf_data)
+{
+ struct resource *this = root->child;
+
+ new->start = root->start;
+ for(;;) {
+ if (this)
+ new->end = this->start;
+ else
+ new->end = root->end;
+ if (new->start < min)
+ new->start = min;
+ if (new->end > max)
+ new->end = max;
+ new->start = (new->start + align - 1) & ~(align - 1);
+ if (alignf)
+ alignf(alignf_data, new, size, align);
+ if (new->start < new->end && new->end - new->start + 1 >= size) {
+ new->end = new->start + size - 1;
+ return 0;
+ }
+ if (!this)
+ break;
+ new->start = this->end + 1;
+ this = this->sibling;
+ }
+ return -EBUSY;
+}
+
+/*
+ * Allocate empty slot in the resource tree given range and alignment.
+ */
+int allocate_resource(struct resource *root, struct resource *new,
+ unsigned long size,
+ unsigned long min, unsigned long max,
+ unsigned long align,
+ void (*alignf)(void *, struct resource *,
+ unsigned long, unsigned long),
+ void *alignf_data)
+{
+ int err;
+
+ write_lock(&resource_lock);
+ err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
+ if (err >= 0 && __request_resource(root, new))
+ err = -EBUSY;
+ write_unlock(&resource_lock);
+ return err;
+}
+
+/*
+ * This is compatibility stuff for IO resources.
+ *
+ * Note how this, unlike the above, knows about
+ * the IO flag meanings (busy etc).
+ *
+ * Request-region creates a new busy region.
+ *
+ * Check-region returns non-zero if the area is already busy
+ *
+ * Release-region releases a matching busy region.
+ */
+struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
+{
+ struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+
+ if (res) {
+ memset(res, 0, sizeof(*res));
+ res->name = name;
+ res->start = start;
+ res->end = start + n - 1;
+ res->flags = IORESOURCE_BUSY;
+
+ write_lock(&resource_lock);
+
+ for (;;) {
+ struct resource *conflict;
+
+ conflict = __request_resource(parent, res);
+ if (!conflict)
+ break;
+ if (conflict != parent) {
+ parent = conflict;
+ if (!(conflict->flags & IORESOURCE_BUSY))
+ continue;
+ }
+
+ /* Uhhuh, that didn't work out.. */
+ kfree(res);
+ res = NULL;
+ break;
+ }
+ write_unlock(&resource_lock);
+ }
+ return res;
+}
+
+int __check_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+ struct resource * res;
+
+ res = __request_region(parent, start, n, "check-region");
+ if (!res)
+ return -EBUSY;
+
+ release_resource(res);
+ kfree(res);
+ return 0;
+}
+
+void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+ struct resource **p;
+ unsigned long end;
+
+ p = &parent->child;
+ end = start + n - 1;
+
+ for (;;) {
+ struct resource *res = *p;
+
+ if (!res)
+ break;
+ if (res->start <= start && res->end >= end) {
+ if (!(res->flags & IORESOURCE_BUSY)) {
+ p = &res->child;
+ continue;
+ }
+ if (res->start != start || res->end != end)
+ break;
+ *p = res->sibling;
+ kfree(res);
+ return;
+ }
+ p = &res->sibling;
+ }
+ printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
+}
+
+
+#if 0
+/*
+ * Called from init/main.c to reserve IO ports.
+ */
+#define MAXRESERVE 4
+static int __init reserve_setup(char *str)
+{
+ static int reserved = 0;
+ static struct resource reserve[MAXRESERVE];
+
+ for (;;) {
+ int io_start, io_num;
+ int x = reserved;
+
+ if (get_option (&str, &io_start) != 2)
+ break;
+ if (get_option (&str, &io_num) == 0)
+ break;
+ if (x < MAXRESERVE) {
+ struct resource *res = reserve + x;
+ res->name = "reserved";
+ res->start = io_start;
+ res->end = io_start + io_num - 1;
+ res->flags = IORESOURCE_BUSY;
+ res->child = NULL;
+ if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
+ reserved = x+1;
+ }
+ }
+ return 1;
+}
+
+__setup("reserve=", reserve_setup);
+#endif
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
new file mode 100644
index 0000000000..787b43d900
--- /dev/null
+++ b/xen/common/schedule.c
@@ -0,0 +1,371 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: schedule.c
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: CPU scheduling
+ * partially moved from domain.c
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/event.h>
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+#include <xeno/interrupt.h>
+
+#undef SCHEDULER_TRACE
+#ifdef SCHEDULER_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+/*
+ * per CPU data for the scheduler.
+ */
+typedef struct schedule_data_st
+{
+ spinlock_t lock;
+ struct list_head runqueue;
+ struct task_struct *prev, *curr;
+} __cacheline_aligned schedule_data_t;
+schedule_data_t schedule_data[NR_CPUS];
+
+static __cacheline_aligned struct ac_timer s_timer[NR_CPUS];
+
+/*
+ * Some convenience functions
+ */
+
+static inline void __add_to_runqueue(struct task_struct * p)
+{
+ list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __move_last_runqueue(struct task_struct * p)
+{
+ list_del(&p->run_list);
+ list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __move_first_runqueue(struct task_struct * p)
+{
+ list_del(&p->run_list);
+ list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __del_from_runqueue(struct task_struct * p)
+{
+ list_del(&p->run_list);
+ p->run_list.next = NULL;
+}
+
+static inline int __task_on_runqueue(struct task_struct *p)
+{
+ return (p->run_list.next != NULL);
+}
+
+
+/*
+ * Add a new domain to the scheduler
+ */
+void sched_add_domain(struct task_struct *p)
+{
+ p->state = TASK_UNINTERRUPTIBLE;
+}
+
+/*
+ * Remove domain to the scheduler
+ */
+void sched_rem_domain(struct task_struct *p)
+{
+ p->state = TASK_DYING;
+}
+
+
+/*
+ * wake up a domain which had been sleeping
+ */
+int wake_up(struct task_struct *p)
+{
+ unsigned long flags;
+ int ret = 0;
+ spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
+ if ( __task_on_runqueue(p) ) goto out;
+ p->state = TASK_RUNNING;
+ __add_to_runqueue(p);
+ ret = 1;
+
+ out:
+ spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
+ return ret;
+}
+
+static void process_timeout(unsigned long __data)
+{
+ struct task_struct * p = (struct task_struct *) __data;
+ wake_up(p);
+}
+
+long schedule_timeout(long timeout)
+{
+ struct timer_list timer;
+ unsigned long expire;
+
+ switch (timeout)
+ {
+ case MAX_SCHEDULE_TIMEOUT:
+ /*
+ * These two special cases are useful to be comfortable in the caller.
+ * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
+ * negative value but I' d like to return a valid offset (>=0) to allow
+ * the caller to do everything it want with the retval.
+ */
+ schedule();
+ goto out;
+ default:
+ /*
+ * Another bit of PARANOID. Note that the retval will be 0 since no
+ * piece of kernel is supposed to do a check for a negative retval of
+ * schedule_timeout() (since it should never happens anyway). You just
+ * have the printk() that will tell you if something is gone wrong and
+ * where.
+ */
+ if (timeout < 0)
+ {
+ printk(KERN_ERR "schedule_timeout: wrong timeout "
+ "value %lx from %p\n", timeout,
+ __builtin_return_address(0));
+ current->state = TASK_RUNNING;
+ goto out;
+ }
+ }
+
+ expire = timeout + jiffies;
+
+ init_timer(&timer);
+ timer.expires = expire;
+ timer.data = (unsigned long) current;
+ timer.function = process_timeout;
+
+ add_timer(&timer);
+ schedule();
+ del_timer_sync(&timer);
+
+ timeout = expire - jiffies;
+
+ out:
+ return timeout < 0 ? 0 : timeout;
+}
+
+/* RN: XXX turn this into do_halt() */
+/*
+ * yield the current process
+ */
+long do_sched_op(void)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ return 0;
+}
+
+
+void reschedule(struct task_struct *p)
+{
+ int cpu = p->processor;
+ struct task_struct *curr;
+ unsigned long flags;
+
+ if (p->has_cpu)
+ return;
+
+ spin_lock_irqsave(&schedule_data[cpu].lock, flags);
+ curr = schedule_data[cpu].curr;
+ if (is_idle_task(curr)) {
+ set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+ spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+#ifdef CONFIG_SMP
+ if (cpu != smp_processor_id())
+ smp_send_event_check_cpu(cpu);
+#endif
+ } else {
+ spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+ }
+}
+
+
+/*
+ * Pick the next domain to run
+ */
+
+asmlinkage void schedule(void)
+{
+ struct task_struct *prev, *next, *p;
+ struct list_head *tmp;
+ int this_cpu;
+
+ need_resched_back:
+ prev = current;
+ this_cpu = prev->processor;
+
+ spin_lock_irq(&schedule_data[this_cpu].lock);
+
+ ASSERT(!in_interrupt());
+ ASSERT(__task_on_runqueue(prev));
+
+ __move_last_runqueue(prev);
+
+ switch ( prev->state )
+ {
+ case TASK_INTERRUPTIBLE:
+ if ( signal_pending(prev) )
+ {
+ prev->state = TASK_RUNNING;
+ break;
+ }
+ default:
+ __del_from_runqueue(prev);
+ case TASK_RUNNING:;
+ }
+ clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
+
+ next = NULL;
+ list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
+ p = list_entry(tmp, struct task_struct, run_list);
+ next = p;
+ if ( !is_idle_task(next) ) break;
+ }
+
+ prev->has_cpu = 0;
+ next->has_cpu = 1;
+
+ schedule_data[this_cpu].prev = prev;
+ schedule_data[this_cpu].curr = next;
+
+ spin_unlock_irq(&schedule_data[this_cpu].lock);
+
+ if ( unlikely(prev == next) )
+ {
+ /* We won't go through the normal tail, so do this by hand */
+ prev->policy &= ~SCHED_YIELD;
+ goto same_process;
+ }
+
+ prepare_to_switch();
+ switch_to(prev, next);
+ prev = schedule_data[this_cpu].prev;
+
+ prev->policy &= ~SCHED_YIELD;
+ if ( prev->state == TASK_DYING ) release_task(prev);
+
+ same_process:
+ update_dom_time(current->shared_info);
+
+ if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
+ goto need_resched_back;
+ return;
+}
+
+/*
+ * The scheduling timer.
+ */
+static __cacheline_aligned int count[NR_CPUS];
+static void sched_timer(unsigned long foo)
+{
+ int cpu = smp_processor_id();
+ struct task_struct *curr = schedule_data[cpu].curr;
+ s_time_t now;
+ int res;
+
+ /* reschedule after each 5 ticks */
+ if (count[cpu] >= 5) {
+ set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+ count[cpu] = 0;
+ }
+ count[cpu]++;
+
+ /*
+ * deliver virtual timer interrups to domains if we are CPU 0 XXX RN: We
+ * don't have a per CPU list of domains yet. Otherwise would use that.
+ * Plus, this should be removed anyway once Domains "know" about virtual
+ * time and timeouts. But, it's better here then where it was before.
+ */
+ if (cpu == 0) {
+ struct task_struct *p;
+ unsigned long cpu_mask = 0;
+
+ /* send virtual timer interrupt */
+ read_lock(&tasklist_lock);
+ p = &idle0_task;
+ do {
+ if ( is_idle_task(p) ) continue;
+ cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
+ }
+ while ( (p = p->next_task) != &idle0_task );
+ read_unlock(&tasklist_lock);
+ guest_event_notify(cpu_mask);
+ }
+
+ again:
+ now = NOW();
+ s_timer[cpu].expires = now + MILLISECS(10);
+ res=add_ac_timer(&s_timer[cpu]);
+
+ TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n",
+ cpu, (u32)(now>>32), (u32)now,
+ (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires));
+ if (res==1)
+ goto again;
+
+}
+
+
+/*
+ * Initialise the data structures
+ */
+void __init scheduler_init(void)
+{
+ int i;
+
+ printk("Initialising schedulers\n");
+
+ for ( i = 0; i < NR_CPUS; i++ )
+ {
+ INIT_LIST_HEAD(&schedule_data[i].runqueue);
+ spin_lock_init(&schedule_data[i].lock);
+ schedule_data[i].prev = &idle0_task;
+ schedule_data[i].curr = &idle0_task;
+
+ /* a timer for each CPU */
+ init_ac_timer(&s_timer[i]);
+ s_timer[i].function = &sched_timer;
+ }
+}
+
+/*
+ * Start a scheduler for each CPU
+ * This has to be done *after* the timers, e.g., APICs, have been initialised
+ */
+void schedulers_start(void)
+{
+ printk("Start schedulers\n");
+ __cli();
+ sched_timer(0);
+ smp_call_function((void *)sched_timer, NULL, 1, 1);
+ __sti();
+}
diff --git a/xen/common/slab.c b/xen/common/slab.c
new file mode 100644
index 0000000000..3452e89aa7
--- /dev/null
+++ b/xen/common/slab.c
@@ -0,0 +1,1945 @@
+/*
+ * linux/mm/slab.c
+ * Written by Mark Hemment, 1996/97.
+ * (markhe@nextd.demon.co.uk)
+ *
+ * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
+ *
+ * Major cleanup, different bufctl logic, per-cpu arrays
+ * (c) 2000 Manfred Spraul
+ *
+ * An implementation of the Slab Allocator as described in outline in;
+ * UNIX Internals: The New Frontiers by Uresh Vahalia
+ * Pub: Prentice Hall ISBN 0-13-101908-2
+ * or with a little more detail in;
+ * The Slab Allocator: An Object-Caching Kernel Memory Allocator
+ * Jeff Bonwick (Sun Microsystems).
+ * Presented at: USENIX Summer 1994 Technical Conference
+ *
+ *
+ * The memory is organized in caches, one cache for each object type.
+ * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
+ * Each cache consists out of many slabs (they are small (usually one
+ * page long) and always contiguous), and each slab contains multiple
+ * initialized objects.
+ *
+ * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
+ * normal). If you need a special memory type, then must create a new
+ * cache for that memory type.
+ *
+ * In order to reduce fragmentation, the slabs are sorted in 3 groups:
+ * full slabs with 0 free objects
+ * partial slabs
+ * empty slabs with no allocated objects
+ *
+ * If partial slabs exist, then new allocations come from these slabs,
+ * otherwise from empty slabs or new slabs are allocated.
+ *
+ * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
+ * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
+ *
+ * On SMP systems, each cache has a short per-cpu head array, most allocs
+ * and frees go into that array, and if that array overflows, then 1/2
+ * of the entries in the array are given back into the global cache.
+ * This reduces the number of spinlock operations.
+ *
+ * The c_cpuarray may not be read with enabled local interrupts.
+ *
+ * SMP synchronization:
+ * constructors and destructors are called without any locking.
+ * Several members in kmem_cache_t and slab_t never change, they
+ * are accessed without any locking.
+ * The per-cpu arrays are never accessed from the wrong cpu, no locking.
+ * The non-constant members are protected with a per-cache irq spinlock.
+ *
+ * Further notes from the original documentation:
+ *
+ * 11 April '97. Started multi-threading - markhe
+ * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
+ * The sem is only needed when accessing/extending the cache-chain, which
+ * can never happen inside an interrupt (kmem_cache_create(),
+ * kmem_cache_shrink() and kmem_cache_reap()).
+ *
+ * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
+ * maybe be sleeping and therefore not holding the semaphore/lock), the
+ * growing field is used. This also prevents reaping from a cache.
+ *
+ * At present, each engine can be growing a cache. This should be blocked.
+ *
+ */
+
+/*
+ * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+ * SLAB_RED_ZONE & SLAB_POISON.
+ * 0 for faster, smaller code (especially in the critical paths).
+ *
+ * STATS - 1 to collect stats for /proc/slabinfo.
+ * 0 for faster, smaller code (especially in the critical paths).
+ *
+ * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/slab.h>
+#include <xeno/list.h>
+#include <xeno/spinlock.h>
+#include <xeno/errno.h>
+#include <xeno/smp.h>
+#include <xeno/sched.h>
+
+
+#ifdef CONFIG_DEBUG_SLAB
+#define DEBUG 1
+#define STATS 1
+#define FORCED_DEBUG 1
+#else
+#define DEBUG 0
+#define STATS 0
+#define FORCED_DEBUG 0
+#endif
+
+/*
+ * Parameters for kmem_cache_reap
+ */
+#define REAP_SCANLEN 10
+#define REAP_PERFECT 10
+
+/* Shouldn't this be in a header file somewhere? */
+#define BYTES_PER_WORD sizeof(void *)
+
+/* Legal flag mask for kmem_cache_create(). */
+#if DEBUG
+# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
+ SLAB_POISON | SLAB_HWCACHE_ALIGN | \
+ SLAB_NO_REAP | SLAB_CACHE_DMA)
+#else
+# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
+#endif
+
+/*
+ * kmem_bufctl_t:
+ *
+ * Bufctl's are used for linking objs within a slab
+ * linked offsets.
+ *
+ * This implementaion relies on "struct page" for locating the cache &
+ * slab an object belongs to.
+ * This allows the bufctl structure to be small (one int), but limits
+ * the number of objects a slab (not a cache) can contain when off-slab
+ * bufctls are used. The limit is the size of the largest general cache
+ * that does not use off-slab slabs.
+ * For 32bit archs with 4 kB pages, is this 56.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+ */
+
+#define BUFCTL_END 0xffffFFFF
+#define SLAB_LIMIT 0xffffFFFE
+typedef unsigned int kmem_bufctl_t;
+
+/* Max number of objs-per-slab for caches which use off-slab slabs.
+ * Needed to avoid a possible looping condition in kmem_cache_grow().
+ */
+static unsigned long offslab_limit;
+
+/*
+ * slab_t
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+typedef struct slab_s {
+ struct list_head list;
+ unsigned long colouroff;
+ void *s_mem; /* including colour offset */
+ unsigned int inuse; /* num of objs active in slab */
+ kmem_bufctl_t free;
+} slab_t;
+
+#define slab_bufctl(slabp) \
+ ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
+
+/*
+ * cpucache_t
+ *
+ * Per cpu structures
+ * The limit is stored in the per-cpu structure to reduce the data cache
+ * footprint.
+ */
+typedef struct cpucache_s {
+ unsigned int avail;
+ unsigned int limit;
+} cpucache_t;
+
+#define cc_entry(cpucache) \
+ ((void **)(((cpucache_t*)(cpucache))+1))
+#define cc_data(cachep) \
+ ((cachep)->cpudata[smp_processor_id()])
+/*
+ * kmem_cache_t
+ *
+ * manages a cache.
+ */
+
+#define CACHE_NAMELEN 20 /* max name length for a slab cache */
+
+struct kmem_cache_s {
+/* 1) each alloc & free */
+ /* full, partial first, then free */
+ struct list_head slabs_full;
+ struct list_head slabs_partial;
+ struct list_head slabs_free;
+ unsigned int objsize;
+ unsigned int flags; /* constant flags */
+ unsigned int num; /* # of objs per slab */
+ spinlock_t spinlock;
+#ifdef CONFIG_SMP
+ unsigned int batchcount;
+#endif
+
+/* 2) slab additions /removals */
+ /* order of pgs per slab (2^n) */
+ unsigned int gfporder;
+
+ /* force GFP flags, e.g. GFP_DMA */
+ unsigned int gfpflags;
+
+ size_t colour; /* cache colouring range */
+ unsigned int colour_off; /* colour offset */
+ unsigned int colour_next; /* cache colouring */
+ kmem_cache_t *slabp_cache;
+ unsigned int growing;
+ unsigned int dflags; /* dynamic flags */
+
+ /* constructor func */
+ void (*ctor)(void *, kmem_cache_t *, unsigned long);
+
+ /* de-constructor func */
+ void (*dtor)(void *, kmem_cache_t *, unsigned long);
+
+ unsigned long failures;
+
+/* 3) cache creation/removal */
+ char name[CACHE_NAMELEN];
+ struct list_head next;
+#ifdef CONFIG_SMP
+/* 4) per-cpu data */
+ cpucache_t *cpudata[NR_CPUS];
+#endif
+#if STATS
+ unsigned long num_active;
+ unsigned long num_allocations;
+ unsigned long high_mark;
+ unsigned long grown;
+ unsigned long reaped;
+ unsigned long errors;
+#ifdef CONFIG_SMP
+ atomic_t allochit;
+ atomic_t allocmiss;
+ atomic_t freehit;
+ atomic_t freemiss;
+#endif
+#endif
+};
+
+/* internal c_flags */
+#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
+#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
+
+/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
+#define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
+
+#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
+#define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)
+#define GROWN(x) ((x)->dlags & DFLGS_GROWN)
+
+#if STATS
+#define STATS_INC_ACTIVE(x) ((x)->num_active++)
+#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
+#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
+#define STATS_INC_GROWN(x) ((x)->grown++)
+#define STATS_INC_REAPED(x) ((x)->reaped++)
+#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
+ (x)->high_mark = (x)->num_active; \
+ } while (0)
+#define STATS_INC_ERR(x) ((x)->errors++)
+#else
+#define STATS_INC_ACTIVE(x) do { } while (0)
+#define STATS_DEC_ACTIVE(x) do { } while (0)
+#define STATS_INC_ALLOCED(x) do { } while (0)
+#define STATS_INC_GROWN(x) do { } while (0)
+#define STATS_INC_REAPED(x) do { } while (0)
+#define STATS_SET_HIGH(x) do { } while (0)
+#define STATS_INC_ERR(x) do { } while (0)
+#endif
+
+#if STATS && defined(CONFIG_SMP)
+#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
+#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
+#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
+#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
+#else
+#define STATS_INC_ALLOCHIT(x) do { } while (0)
+#define STATS_INC_ALLOCMISS(x) do { } while (0)
+#define STATS_INC_FREEHIT(x) do { } while (0)
+#define STATS_INC_FREEMISS(x) do { } while (0)
+#endif
+
+#if DEBUG
+/* Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define RED_MAGIC1 0x5A2CF071UL /* when obj is active */
+#define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
+
+/* ...and for poisoning */
+#define POISON_BYTE 0x5a /* byte value for poisoning */
+#define POISON_END 0xa5 /* end-byte of poisoning */
+
+#endif
+
+/* maximum size of an obj (in 2^order pages) */
+#define MAX_OBJ_ORDER 5 /* 32 pages */
+
+/*
+ * Do not go above this order unless 0 objects fit into the slab.
+ */
+#define BREAK_GFP_ORDER_HI 2
+#define BREAK_GFP_ORDER_LO 1
+static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+
+/*
+ * Absolute limit for the gfp order
+ */
+#define MAX_GFP_ORDER 5 /* 32 pages */
+
+
+/* Macros for storing/retrieving the cachep and or slab from the
+ * global 'mem_map'. These are used to find the slab an obj belongs to.
+ * With kfree(), these are used to find the cache which an obj belongs to.
+ */
+#define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
+#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)
+#define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
+#define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)
+
+/* Size description struct for general caches. */
+typedef struct cache_sizes {
+ size_t cs_size;
+ kmem_cache_t *cs_cachep;
+ kmem_cache_t *cs_dmacachep;
+} cache_sizes_t;
+
+static cache_sizes_t cache_sizes[] = {
+#if PAGE_SIZE == 4096
+ { 32, NULL, NULL},
+#endif
+ { 64, NULL, NULL},
+ { 128, NULL, NULL},
+ { 256, NULL, NULL},
+ { 512, NULL, NULL},
+ { 1024, NULL, NULL},
+ { 2048, NULL, NULL},
+ { 4096, NULL, NULL},
+ { 8192, NULL, NULL},
+ { 16384, NULL, NULL},
+ { 32768, NULL, NULL},
+ { 65536, NULL, NULL},
+ {131072, NULL, NULL},
+ { 0, NULL, NULL}
+};
+
+/* internal cache of cache description objs */
+static kmem_cache_t cache_cache = {
+ slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full),
+ slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial),
+ slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free),
+ objsize: sizeof(kmem_cache_t),
+ flags: SLAB_NO_REAP,
+ spinlock: SPIN_LOCK_UNLOCKED,
+ colour_off: L1_CACHE_BYTES,
+ name: "kmem_cache",
+};
+
+/* Guard access to the cache-chain. */
+/* KAF: No semaphores, as we'll never wait around for I/O. */
+static spinlock_t cache_chain_sem;
+#define init_MUTEX(_m) spin_lock_init(_m)
+#define down(_m) spin_lock_irqsave(_m,spin_flags)
+#define up(_m) spin_unlock_irqrestore(_m,spin_flags)
+
+/* Place maintainer for reaping. */
+static kmem_cache_t *clock_searchp = &cache_cache;
+
+#define cache_chain (cache_cache.next)
+
+#ifdef CONFIG_SMP
+/*
+ * chicken and egg problem: delay the per-cpu array allocation
+ * until the general caches are up.
+ */
+static int g_cpucache_up;
+
+static void enable_cpucache (kmem_cache_t *cachep);
+static void enable_all_cpucaches (void);
+#endif
+
+/* Cal the num objs, wastage, and bytes left over for a given slab size. */
+static void kmem_cache_estimate (unsigned long gfporder, size_t size,
+ int flags, size_t *left_over, unsigned int *num)
+{
+ int i;
+ size_t wastage = PAGE_SIZE<<gfporder;
+ size_t extra = 0;
+ size_t base = 0;
+
+ if (!(flags & CFLGS_OFF_SLAB)) {
+ base = sizeof(slab_t);
+ extra = sizeof(kmem_bufctl_t);
+ }
+ i = 0;
+ while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
+ i++;
+ if (i > 0)
+ i--;
+
+ if (i > SLAB_LIMIT)
+ i = SLAB_LIMIT;
+
+ *num = i;
+ wastage -= i*size;
+ wastage -= L1_CACHE_ALIGN(base+i*extra);
+ *left_over = wastage;
+}
+
+/* Initialisation - setup the `cache' cache. */
+void __init kmem_cache_init(void)
+{
+ size_t left_over;
+
+ init_MUTEX(&cache_chain_sem);
+ INIT_LIST_HEAD(&cache_chain);
+
+ kmem_cache_estimate(0, cache_cache.objsize, 0,
+ &left_over, &cache_cache.num);
+ if (!cache_cache.num)
+ BUG();
+
+ cache_cache.colour = left_over/cache_cache.colour_off;
+ cache_cache.colour_next = 0;
+}
+
+
+/* Initialisation - setup remaining internal and general caches.
+ * Called after the gfp() functions have been enabled, and before smp_init().
+ */
+void __init kmem_cache_sizes_init(unsigned long num_physpages)
+{
+ cache_sizes_t *sizes = cache_sizes;
+ char name[20];
+ /*
+ * Fragmentation resistance on low memory - only use bigger
+ * page orders on machines with more than 32MB of memory.
+ */
+ if (num_physpages > (32 << 20) >> PAGE_SHIFT)
+ slab_break_gfp_order = BREAK_GFP_ORDER_HI;
+ do {
+ /* For performance, all the general caches are L1 aligned.
+ * This should be particularly beneficial on SMP boxes, as it
+ * eliminates "false sharing".
+ * Note for systems short on memory removing the alignment will
+ * allow tighter packing of the smaller caches. */
+ sprintf(name,"size-%Zd",sizes->cs_size);
+ if (!(sizes->cs_cachep =
+ kmem_cache_create(name, sizes->cs_size,
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
+ BUG();
+ }
+
+ /* Inc off-slab bufctl limit until the ceiling is hit. */
+ if (!(OFF_SLAB(sizes->cs_cachep))) {
+ offslab_limit = sizes->cs_size-sizeof(slab_t);
+ offslab_limit /= 2;
+ }
+ sprintf(name, "size-%Zd(DMA)",sizes->cs_size);
+ sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
+ SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!sizes->cs_dmacachep)
+ BUG();
+ sizes++;
+ } while (sizes->cs_size);
+}
+
+int __init kmem_cpucache_init(void)
+{
+#ifdef CONFIG_SMP
+ g_cpucache_up = 1;
+ enable_all_cpucaches();
+#endif
+ return 0;
+}
+
+/*__initcall(kmem_cpucache_init);*/
+
+/* Interface to system's page allocator. No need to hold the cache-lock.
+ */
+static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
+{
+ void *addr;
+
+ /*
+ * If we requested dmaable memory, we will get it. Even if we
+ * did not request dmaable memory, we might get it, but that
+ * would be relatively rare and ignorable.
+ */
+ flags |= cachep->gfpflags;
+ addr = (void*) __get_free_pages(flags, cachep->gfporder);
+ /* Assume that now we have the pages no one else can legally
+ * messes with the 'struct page's.
+ * However vm_scan() might try to test the structure to see if
+ * it is a named-page or buffer-page. The members it tests are
+ * of no interest here.....
+ */
+ return addr;
+}
+
+/* Interface to system's page release. */
+static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
+{
+ unsigned long i = (1<<cachep->gfporder);
+ struct pfn_info *page = virt_to_page(addr);
+
+ /* free_pages() does not clear the type bit - we do that.
+ * The pages have been unlinked from their cache-slab,
+ * but their 'struct page's might be accessed in
+ * vm_scan(). Shouldn't be a worry.
+ */
+ while (i--) {
+ PageClearSlab(page);
+ page++;
+ }
+
+ free_pages((unsigned long)addr, cachep->gfporder);
+}
+
+#if DEBUG
+static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
+{
+ int size = cachep->objsize;
+ if (cachep->flags & SLAB_RED_ZONE) {
+ addr += BYTES_PER_WORD;
+ size -= 2*BYTES_PER_WORD;
+ }
+ memset(addr, POISON_BYTE, size);
+ *(unsigned char *)(addr+size-1) = POISON_END;
+}
+
+static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
+{
+ int size = cachep->objsize;
+ void *end;
+ if (cachep->flags & SLAB_RED_ZONE) {
+ addr += BYTES_PER_WORD;
+ size -= 2*BYTES_PER_WORD;
+ }
+ end = memchr(addr, POISON_END, size);
+ if (end != (addr+size-1))
+ return 1;
+ return 0;
+}
+#endif
+
+/* Destroy all the objs in a slab, and release the mem back to the system.
+ * Before calling the slab must have been unlinked from the cache.
+ * The cache-lock is not held/needed.
+ */
+static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
+{
+ if (cachep->dtor
+#if DEBUG
+ || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
+#endif
+ ) {
+ int i;
+ for (i = 0; i < cachep->num; i++) {
+ void* objp = slabp->s_mem+cachep->objsize*i;
+#if DEBUG
+ if (cachep->flags & SLAB_RED_ZONE) {
+ if (*((unsigned long*)(objp)) != RED_MAGIC1)
+ BUG();
+ if (*((unsigned long*)(objp + cachep->objsize
+ -BYTES_PER_WORD)) != RED_MAGIC1)
+ BUG();
+ objp += BYTES_PER_WORD;
+ }
+#endif
+ if (cachep->dtor)
+ (cachep->dtor)(objp, cachep, 0);
+#if DEBUG
+ if (cachep->flags & SLAB_RED_ZONE) {
+ objp -= BYTES_PER_WORD;
+ }
+ if ((cachep->flags & SLAB_POISON) &&
+ kmem_check_poison_obj(cachep, objp))
+ BUG();
+#endif
+ }
+ }
+
+ kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->slabp_cache, slabp);
+}
+
+/**
+ * kmem_cache_create - Create a cache.
+ * @name: A string which is used in /proc/slabinfo to identify this cache.
+ * @size: The size of objects to be created in this cache.
+ * @offset: The offset to use within the page.
+ * @flags: SLAB flags
+ * @ctor: A constructor for the objects.
+ * @dtor: A destructor for the objects.
+ *
+ * Returns a ptr to the cache on success, NULL on failure.
+ * Cannot be called within a int, but can be interrupted.
+ * The @ctor is run when new pages are allocated by the cache
+ * and the @dtor is run before the pages are handed back.
+ * The flags are
+ *
+ * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
+ * to catch references to uninitialised memory.
+ *
+ * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
+ * for buffer overruns.
+ *
+ * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
+ * memory pressure.
+ *
+ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
+ * cacheline. This can be beneficial if you're counting cycles as closely
+ * as davem.
+ */
+kmem_cache_t *
+kmem_cache_create (const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+ void (*dtor)(void*, kmem_cache_t *, unsigned long))
+{
+ const char *func_nm = KERN_ERR "kmem_create: ";
+ size_t left_over, align, slab_size;
+ kmem_cache_t *cachep = NULL;
+ unsigned long spin_flags;
+
+ /*
+ * Sanity checks... these are all serious usage bugs.
+ */
+ if ((!name) ||
+ ((strlen(name) >= CACHE_NAMELEN - 1)) ||
+ (size < BYTES_PER_WORD) ||
+ (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
+ (dtor && !ctor) ||
+ (offset < 0 || offset > size))
+ BUG();
+
+#if DEBUG
+ if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
+ /* No constructor, but inital state check requested */
+ printk("%sNo con, but init state check requested - %s\n", func_nm, name);
+ flags &= ~SLAB_DEBUG_INITIAL;
+ }
+
+ if ((flags & SLAB_POISON) && ctor) {
+ /* request for poisoning, but we can't do that with a constructor */
+ printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
+ flags &= ~SLAB_POISON;
+ }
+#if FORCED_DEBUG
+ if (size < (PAGE_SIZE>>3))
+ /*
+ * do not red zone large object, causes severe
+ * fragmentation.
+ */
+ flags |= SLAB_RED_ZONE;
+ if (!ctor)
+ flags |= SLAB_POISON;
+#endif
+#endif
+
+ /*
+ * Always checks flags, a caller might be expecting debug
+ * support which isn't available.
+ */
+ if (flags & ~CREATE_MASK)
+ BUG();
+
+ /* Get cache's description obj. */
+ cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
+ if (!cachep)
+ goto opps;
+ memset(cachep, 0, sizeof(kmem_cache_t));
+
+ /* Check that size is in terms of words. This is needed to avoid
+ * unaligned accesses for some archs when redzoning is used, and makes
+ * sure any on-slab bufctl's are also correctly aligned.
+ */
+ if (size & (BYTES_PER_WORD-1)) {
+ size += (BYTES_PER_WORD-1);
+ size &= ~(BYTES_PER_WORD-1);
+ printk("%sForcing size word alignment - %s\n", func_nm, name);
+ }
+
+#if DEBUG
+ if (flags & SLAB_RED_ZONE) {
+ /*
+ * There is no point trying to honour cache alignment
+ * when redzoning.
+ */
+ flags &= ~SLAB_HWCACHE_ALIGN;
+ size += 2*BYTES_PER_WORD; /* words for redzone */
+ }
+#endif
+ align = BYTES_PER_WORD;
+ if (flags & SLAB_HWCACHE_ALIGN)
+ align = L1_CACHE_BYTES;
+
+ /* Determine if the slab management is 'on' or 'off' slab. */
+ if (size >= (PAGE_SIZE>>3))
+ /*
+ * Size is large, assume best to place the slab management obj
+ * off-slab (should allow better packing of objs).
+ */
+ flags |= CFLGS_OFF_SLAB;
+
+ if (flags & SLAB_HWCACHE_ALIGN) {
+ /* Need to adjust size so that objs are cache aligned. */
+ /* Small obj size, can get at least two per cache line. */
+ /* FIXME: only power of 2 supported, was better */
+ while (size < align/2)
+ align /= 2;
+ size = (size+align-1)&(~(align-1));
+ }
+
+ /* Cal size (in pages) of slabs, and the num of objs per slab.
+ * This could be made much more intelligent. For now, try to avoid
+ * using high page-orders for slabs. When the gfp() funcs are more
+ * friendly towards high-order requests, this should be changed.
+ */
+ do {
+ unsigned int break_flag = 0;
+cal_wastage:
+ kmem_cache_estimate(cachep->gfporder, size, flags,
+ &left_over, &cachep->num);
+ if (break_flag)
+ break;
+ if (cachep->gfporder >= MAX_GFP_ORDER)
+ break;
+ if (!cachep->num)
+ goto next;
+ if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
+ /* Oops, this num of objs will cause problems. */
+ cachep->gfporder--;
+ break_flag++;
+ goto cal_wastage;
+ }
+
+ /*
+ * Large num of objs is good, but v. large slabs are currently
+ * bad for the gfp()s.
+ */
+ if (cachep->gfporder >= slab_break_gfp_order)
+ break;
+
+ if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
+ break; /* Acceptable internal fragmentation. */
+next:
+ cachep->gfporder++;
+ } while (1);
+
+ if (!cachep->num) {
+ printk("kmem_cache_create: couldn't create cache %s.\n", name);
+ kmem_cache_free(&cache_cache, cachep);
+ cachep = NULL;
+ goto opps;
+ }
+ slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
+
+ /*
+ * If the slab has been placed off-slab, and we have enough space then
+ * move it on-slab. This is at the expense of any extra colouring.
+ */
+ if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+ flags &= ~CFLGS_OFF_SLAB;
+ left_over -= slab_size;
+ }
+
+ /* Offset must be a multiple of the alignment. */
+ offset += (align-1);
+ offset &= ~(align-1);
+ if (!offset)
+ offset = L1_CACHE_BYTES;
+ cachep->colour_off = offset;
+ cachep->colour = left_over/offset;
+
+ /* init remaining fields */
+ if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
+ flags |= CFLGS_OPTIMIZE;
+
+ cachep->flags = flags;
+ cachep->gfpflags = 0;
+ if (flags & SLAB_CACHE_DMA)
+ cachep->gfpflags |= GFP_DMA;
+ spin_lock_init(&cachep->spinlock);
+ cachep->objsize = size;
+ INIT_LIST_HEAD(&cachep->slabs_full);
+ INIT_LIST_HEAD(&cachep->slabs_partial);
+ INIT_LIST_HEAD(&cachep->slabs_free);
+
+ if (flags & CFLGS_OFF_SLAB)
+ cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
+ cachep->ctor = ctor;
+ cachep->dtor = dtor;
+ /* Copy name over so we don't have problems with unloaded modules */
+ strcpy(cachep->name, name);
+
+#ifdef CONFIG_SMP
+ if (g_cpucache_up)
+ enable_cpucache(cachep);
+#endif
+ /* Need the semaphore to access the chain. */
+ down(&cache_chain_sem);
+ {
+ struct list_head *p;
+
+ list_for_each(p, &cache_chain) {
+ kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
+
+ /* The name field is constant - no lock needed. */
+ if (!strcmp(pc->name, name))
+ BUG();
+ }
+ }
+
+ /* There is no reason to lock our new cache before we
+ * link it in - no one knows about it yet...
+ */
+ list_add(&cachep->next, &cache_chain);
+ up(&cache_chain_sem);
+opps:
+ return cachep;
+}
+
+
+#if DEBUG
+/*
+ * This check if the kmem_cache_t pointer is chained in the cache_cache
+ * list. -arca
+ */
+static int is_chained_kmem_cache(kmem_cache_t * cachep)
+{
+ struct list_head *p;
+ int ret = 0;
+ unsigned long spin_flags;
+
+ /* Find the cache in the chain of caches. */
+ down(&cache_chain_sem);
+ list_for_each(p, &cache_chain) {
+ if (p == &cachep->next) {
+ ret = 1;
+ break;
+ }
+ }
+ up(&cache_chain_sem);
+
+ return ret;
+}
+#else
+#define is_chained_kmem_cache(x) 1
+#endif
+
+#ifdef CONFIG_SMP
+/*
+ * Waits for all CPUs to execute func().
+ */
+static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
+{
+ local_irq_disable();
+ func(arg);
+ local_irq_enable();
+
+ if (smp_call_function(func, arg, 1, 1))
+ BUG();
+}
+typedef struct ccupdate_struct_s
+{
+ kmem_cache_t *cachep;
+ cpucache_t *new[NR_CPUS];
+} ccupdate_struct_t;
+
+static void do_ccupdate_local(void *info)
+{
+ ccupdate_struct_t *new = (ccupdate_struct_t *)info;
+ cpucache_t *old = cc_data(new->cachep);
+
+ cc_data(new->cachep) = new->new[smp_processor_id()];
+ new->new[smp_processor_id()] = old;
+}
+
+static void free_block (kmem_cache_t* cachep, void** objpp, int len);
+
+static void drain_cpu_caches(kmem_cache_t *cachep)
+{
+ ccupdate_struct_t new;
+ int i;
+ unsigned long spin_flags;
+
+ memset(&new.new,0,sizeof(new.new));
+
+ new.cachep = cachep;
+
+ down(&cache_chain_sem);
+ smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+
+ for (i = 0; i < smp_num_cpus; i++) {
+ cpucache_t* ccold = new.new[cpu_logical_map(i)];
+ if (!ccold || (ccold->avail == 0))
+ continue;
+ local_irq_disable();
+ free_block(cachep, cc_entry(ccold), ccold->avail);
+ local_irq_enable();
+ ccold->avail = 0;
+ }
+ smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+ up(&cache_chain_sem);
+}
+
+#else
+#define drain_cpu_caches(cachep) do { } while (0)
+#endif
+
+static int __kmem_cache_shrink(kmem_cache_t *cachep)
+{
+ slab_t *slabp;
+ int ret;
+
+ drain_cpu_caches(cachep);
+
+ spin_lock_irq(&cachep->spinlock);
+
+ /* If the cache is growing, stop shrinking. */
+ while (!cachep->growing) {
+ struct list_head *p;
+
+ p = cachep->slabs_free.prev;
+ if (p == &cachep->slabs_free)
+ break;
+
+ slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
+#if DEBUG
+ if (slabp->inuse)
+ BUG();
+#endif
+ list_del(&slabp->list);
+
+ spin_unlock_irq(&cachep->spinlock);
+ kmem_slab_destroy(cachep, slabp);
+ spin_lock_irq(&cachep->spinlock);
+ }
+ ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial);
+ spin_unlock_irq(&cachep->spinlock);
+ return ret;
+}
+
+/**
+ * kmem_cache_shrink - Shrink a cache.
+ * @cachep: The cache to shrink.
+ *
+ * Releases as many slabs as possible for a cache.
+ * To help debugging, a zero exit status indicates all slabs were released.
+ */
+int kmem_cache_shrink(kmem_cache_t *cachep)
+{
+ if (!cachep || !is_chained_kmem_cache(cachep))
+ BUG();
+
+ return __kmem_cache_shrink(cachep);
+}
+
+/**
+ * kmem_cache_destroy - delete a cache
+ * @cachep: the cache to destroy
+ *
+ * Remove a kmem_cache_t object from the slab cache.
+ * Returns 0 on success.
+ *
+ * It is expected this function will be called by a module when it is
+ * unloaded. This will remove the cache completely, and avoid a duplicate
+ * cache being allocated each time a module is loaded and unloaded, if the
+ * module doesn't have persistent in-kernel storage across loads and unloads.
+ *
+ * The caller must guarantee that noone will allocate memory from the cache
+ * during the kmem_cache_destroy().
+ */
+int kmem_cache_destroy (kmem_cache_t * cachep)
+{
+ unsigned long spin_flags;
+
+ if (!cachep || cachep->growing)
+ BUG();
+
+ /* Find the cache in the chain of caches. */
+ down(&cache_chain_sem);
+ /* the chain is never empty, cache_cache is never destroyed */
+ if (clock_searchp == cachep)
+ clock_searchp = list_entry(cachep->next.next,
+ kmem_cache_t, next);
+ list_del(&cachep->next);
+ up(&cache_chain_sem);
+
+ if (__kmem_cache_shrink(cachep)) {
+ printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
+ cachep);
+ down(&cache_chain_sem);
+ list_add(&cachep->next,&cache_chain);
+ up(&cache_chain_sem);
+ return 1;
+ }
+#ifdef CONFIG_SMP
+ {
+ int i;
+ for (i = 0; i < NR_CPUS; i++)
+ kfree(cachep->cpudata[i]);
+ }
+#endif
+ kmem_cache_free(&cache_cache, cachep);
+
+ return 0;
+}
+
+/* Get the memory for a slab management obj. */
+static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
+ void *objp, int colour_off, int local_flags)
+{
+ slab_t *slabp;
+
+ if (OFF_SLAB(cachep)) {
+ /* Slab management obj is off-slab. */
+ slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+ if (!slabp)
+ return NULL;
+ } else {
+ /* FIXME: change to
+ slabp = objp
+ * if you enable OPTIMIZE
+ */
+ slabp = objp+colour_off;
+ colour_off += L1_CACHE_ALIGN(cachep->num *
+ sizeof(kmem_bufctl_t) + sizeof(slab_t));
+ }
+ slabp->inuse = 0;
+ slabp->colouroff = colour_off;
+ slabp->s_mem = objp+colour_off;
+
+ return slabp;
+}
+
+static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
+ slab_t * slabp, unsigned long ctor_flags)
+{
+ int i;
+
+ for (i = 0; i < cachep->num; i++) {
+ void* objp = slabp->s_mem+cachep->objsize*i;
+#if DEBUG
+ if (cachep->flags & SLAB_RED_ZONE) {
+ *((unsigned long*)(objp)) = RED_MAGIC1;
+ *((unsigned long*)(objp + cachep->objsize -
+ BYTES_PER_WORD)) = RED_MAGIC1;
+ objp += BYTES_PER_WORD;
+ }
+#endif
+
+ /*
+ * Constructors are not allowed to allocate memory from
+ * the same cache which they are a constructor for.
+ * Otherwise, deadlock. They must also be threaded.
+ */
+ if (cachep->ctor)
+ cachep->ctor(objp, cachep, ctor_flags);
+#if DEBUG
+ if (cachep->flags & SLAB_RED_ZONE)
+ objp -= BYTES_PER_WORD;
+ if (cachep->flags & SLAB_POISON)
+ /* need to poison the objs */
+ kmem_poison_obj(cachep, objp);
+ if (cachep->flags & SLAB_RED_ZONE) {
+ if (*((unsigned long*)(objp)) != RED_MAGIC1)
+ BUG();
+ if (*((unsigned long*)(objp + cachep->objsize -
+ BYTES_PER_WORD)) != RED_MAGIC1)
+ BUG();
+ }
+#endif
+ slab_bufctl(slabp)[i] = i+1;
+ }
+ slab_bufctl(slabp)[i-1] = BUFCTL_END;
+ slabp->free = 0;
+}
+
+/*
+ * Grow (by 1) the number of slabs within a cache. This is called by
+ * kmem_cache_alloc() when there are no active objs left in a cache.
+ */
+static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
+{
+ slab_t *slabp;
+ struct pfn_info *page; unsigned int i;
+ void *objp;
+ size_t offset;
+ unsigned int local_flags;
+ unsigned long ctor_flags;
+ unsigned long save_flags;
+
+ /* Be lazy and only check for valid flags here,
+ * keeping it out of the critical path in kmem_cache_alloc().
+ */
+ if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
+ BUG();
+ if (flags & SLAB_NO_GROW)
+ return 0;
+
+#if 0
+ if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
+ BUG();
+#endif
+
+ ctor_flags = SLAB_CTOR_CONSTRUCTOR;
+ local_flags = (flags & SLAB_LEVEL_MASK);
+ if (local_flags == SLAB_ATOMIC)
+ /*
+ * Not allowed to sleep. Need to tell a constructor about
+ * this - it might need to know...
+ */
+ ctor_flags |= SLAB_CTOR_ATOMIC;
+
+ /* About to mess with non-constant members - lock. */
+ spin_lock_irqsave(&cachep->spinlock, save_flags);
+
+ /* Get colour for the slab, and cal the next value. */
+ offset = cachep->colour_next;
+ cachep->colour_next++;
+ if (cachep->colour_next >= cachep->colour)
+ cachep->colour_next = 0;
+ offset *= cachep->colour_off;
+ cachep->dflags |= DFLGS_GROWN;
+
+ cachep->growing++;
+ spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+
+ /* A series of memory allocations for a new slab.
+ * Neither the cache-chain semaphore, or cache-lock, are
+ * held, but the incrementing c_growing prevents this
+ * cache from being reaped or shrunk.
+ * Note: The cache could be selected in for reaping in
+ * kmem_cache_reap(), but when the final test is made the
+ * growing value will be seen.
+ */
+
+ /* Get mem for the objs. */
+ if (!(objp = kmem_getpages(cachep, flags)))
+ goto failed;
+
+ /* Get slab management. */
+ if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
+ goto opps1;
+
+ /* Nasty!!!!!! I hope this is OK. */
+ i = 1 << cachep->gfporder;
+ page = virt_to_page(objp);
+ do {
+ SET_PAGE_CACHE(page, cachep);
+ SET_PAGE_SLAB(page, slabp);
+ PageSetSlab(page);
+ page++;
+ } while (--i);
+
+ kmem_cache_init_objs(cachep, slabp, ctor_flags);
+
+ spin_lock_irqsave(&cachep->spinlock, save_flags);
+ cachep->growing--;
+
+ /* Make slab active. */
+ list_add_tail(&slabp->list, &cachep->slabs_free);
+ STATS_INC_GROWN(cachep);
+ cachep->failures = 0;
+
+ spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+ return 1;
+opps1:
+ kmem_freepages(cachep, objp);
+failed:
+ spin_lock_irqsave(&cachep->spinlock, save_flags);
+ cachep->growing--;
+ spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+ return 0;
+}
+
+/*
+ * Perform extra freeing checks:
+ * - detect double free
+ * - detect bad pointers.
+ * Called with the cache-lock held.
+ */
+
+#if DEBUG
+static int kmem_extra_free_checks (kmem_cache_t * cachep,
+ slab_t *slabp, void * objp)
+{
+ int i;
+ unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
+
+ if (objnr >= cachep->num)
+ BUG();
+ if (objp != slabp->s_mem + objnr*cachep->objsize)
+ BUG();
+
+ /* Check slab's freelist to see if this obj is there. */
+ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
+ if (i == objnr)
+ BUG();
+ }
+ return 0;
+}
+#endif
+
+static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
+{
+ if (flags & SLAB_DMA) {
+ if (!(cachep->gfpflags & GFP_DMA))
+ BUG();
+ } else {
+ if (cachep->gfpflags & GFP_DMA)
+ BUG();
+ }
+}
+
+static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
+ slab_t *slabp)
+{
+ void *objp;
+
+ STATS_INC_ALLOCED(cachep);
+ STATS_INC_ACTIVE(cachep);
+ STATS_SET_HIGH(cachep);
+
+ /* get obj pointer */
+ slabp->inuse++;
+ objp = slabp->s_mem + slabp->free*cachep->objsize;
+ slabp->free=slab_bufctl(slabp)[slabp->free];
+
+ if (unlikely(slabp->free == BUFCTL_END)) {
+ list_del(&slabp->list);
+ list_add(&slabp->list, &cachep->slabs_full);
+ }
+#if DEBUG
+ if (cachep->flags & SLAB_POISON)
+ if (kmem_check_poison_obj(cachep, objp))
+ BUG();
+ if (cachep->flags & SLAB_RED_ZONE) {
+ /* Set alloc red-zone, and check old one. */
+ if (xchg((unsigned long *)objp, RED_MAGIC2) !=
+ RED_MAGIC1)
+ BUG();
+ if (xchg((unsigned long *)(objp+cachep->objsize -
+ BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
+ BUG();
+ objp += BYTES_PER_WORD;
+ }
+#endif
+ return objp;
+}
+
+/*
+ * Returns a ptr to an obj in the given cache.
+ * caller must guarantee synchronization
+ * #define for the goto optimization 8-)
+ */
+#define kmem_cache_alloc_one(cachep) \
+({ \
+ struct list_head * slabs_partial, * entry; \
+ slab_t *slabp; \
+ \
+ slabs_partial = &(cachep)->slabs_partial; \
+ entry = slabs_partial->next; \
+ if (unlikely(entry == slabs_partial)) { \
+ struct list_head * slabs_free; \
+ slabs_free = &(cachep)->slabs_free; \
+ entry = slabs_free->next; \
+ if (unlikely(entry == slabs_free)) \
+ goto alloc_new_slab; \
+ list_del(entry); \
+ list_add(entry, slabs_partial); \
+ } \
+ \
+ slabp = list_entry(entry, slab_t, list); \
+ kmem_cache_alloc_one_tail(cachep, slabp); \
+})
+
+#ifdef CONFIG_SMP
+void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
+{
+ int batchcount = cachep->batchcount;
+ cpucache_t* cc = cc_data(cachep);
+
+ spin_lock(&cachep->spinlock);
+ while (batchcount--) {
+ struct list_head * slabs_partial, * entry;
+ slab_t *slabp;
+ /* Get slab alloc is to come from. */
+ slabs_partial = &(cachep)->slabs_partial;
+ entry = slabs_partial->next;
+ if (unlikely(entry == slabs_partial)) {
+ struct list_head * slabs_free;
+ slabs_free = &(cachep)->slabs_free;
+ entry = slabs_free->next;
+ if (unlikely(entry == slabs_free))
+ break;
+ list_del(entry);
+ list_add(entry, slabs_partial);
+ }
+
+ slabp = list_entry(entry, slab_t, list);
+ cc_entry(cc)[cc->avail++] =
+ kmem_cache_alloc_one_tail(cachep, slabp);
+ }
+ spin_unlock(&cachep->spinlock);
+
+ if (cc->avail)
+ return cc_entry(cc)[--cc->avail];
+ return NULL;
+}
+#endif
+
+static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
+{
+ unsigned long save_flags;
+ void* objp;
+
+ kmem_cache_alloc_head(cachep, flags);
+try_again:
+ local_irq_save(save_flags);
+#ifdef CONFIG_SMP
+ {
+ cpucache_t *cc = cc_data(cachep);
+
+ if (cc) {
+ if (cc->avail) {
+ STATS_INC_ALLOCHIT(cachep);
+ objp = cc_entry(cc)[--cc->avail];
+ } else {
+ STATS_INC_ALLOCMISS(cachep);
+ objp = kmem_cache_alloc_batch(cachep,flags);
+ if (!objp)
+ goto alloc_new_slab_nolock;
+ }
+ } else {
+ spin_lock(&cachep->spinlock);
+ objp = kmem_cache_alloc_one(cachep);
+ spin_unlock(&cachep->spinlock);
+ }
+ }
+#else
+ objp = kmem_cache_alloc_one(cachep);
+#endif
+ local_irq_restore(save_flags);
+ return objp;
+alloc_new_slab:
+#ifdef CONFIG_SMP
+ spin_unlock(&cachep->spinlock);
+alloc_new_slab_nolock:
+#endif
+ local_irq_restore(save_flags);
+ if (kmem_cache_grow(cachep, flags))
+ /* Someone may have stolen our objs. Doesn't matter, we'll
+ * just come back here again.
+ */
+ goto try_again;
+ return NULL;
+}
+
+/*
+ * Release an obj back to its cache. If the obj has a constructed
+ * state, it should be in this state _before_ it is released.
+ * - caller is responsible for the synchronization
+ */
+
+#if DEBUG
+# define CHECK_NR(pg) \
+ do { \
+ if (!VALID_PAGE(pg)) { \
+ printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
+ (unsigned long)objp); \
+ BUG(); \
+ } \
+ } while (0)
+# define CHECK_PAGE(page) \
+ do { \
+ CHECK_NR(page); \
+ if (!PageSlab(page)) { \
+ printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
+ (unsigned long)objp); \
+ BUG(); \
+ } \
+ } while (0)
+
+#else
+# define CHECK_PAGE(pg) do { } while (0)
+#endif
+
+static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
+{
+ slab_t* slabp;
+
+ CHECK_PAGE(virt_to_page(objp));
+ /* reduces memory footprint
+ *
+ if (OPTIMIZE(cachep))
+ slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
+ else
+ */
+ slabp = GET_PAGE_SLAB(virt_to_page(objp));
+
+#if DEBUG
+ if (cachep->flags & SLAB_DEBUG_INITIAL)
+ /* Need to call the slab's constructor so the
+ * caller can perform a verify of its state (debugging).
+ * Called without the cache-lock held.
+ */
+ cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
+
+ if (cachep->flags & SLAB_RED_ZONE) {
+ objp -= BYTES_PER_WORD;
+ if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
+ /* Either write before start, or a double free. */
+ BUG();
+ if (xchg((unsigned long *)(objp+cachep->objsize -
+ BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
+ /* Either write past end, or a double free. */
+ BUG();
+ }
+ if (cachep->flags & SLAB_POISON)
+ kmem_poison_obj(cachep, objp);
+ if (kmem_extra_free_checks(cachep, slabp, objp))
+ return;
+#endif
+ {
+ unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
+
+ slab_bufctl(slabp)[objnr] = slabp->free;
+ slabp->free = objnr;
+ }
+ STATS_DEC_ACTIVE(cachep);
+
+ /* fixup slab chains */
+ {
+ int inuse = slabp->inuse;
+ if (unlikely(!--slabp->inuse)) {
+ /* Was partial or full, now empty. */
+ list_del(&slabp->list);
+ list_add(&slabp->list, &cachep->slabs_free);
+ } else if (unlikely(inuse == cachep->num)) {
+ /* Was full. */
+ list_del(&slabp->list);
+ list_add(&slabp->list, &cachep->slabs_partial);
+ }
+ }
+}
+
+#ifdef CONFIG_SMP
+static inline void __free_block (kmem_cache_t* cachep,
+ void** objpp, int len)
+{
+ for ( ; len > 0; len--, objpp++)
+ kmem_cache_free_one(cachep, *objpp);
+}
+
+static void free_block (kmem_cache_t* cachep, void** objpp, int len)
+{
+ spin_lock(&cachep->spinlock);
+ __free_block(cachep, objpp, len);
+ spin_unlock(&cachep->spinlock);
+}
+#endif
+
+/*
+ * __kmem_cache_free
+ * called with disabled ints
+ */
+static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
+{
+#ifdef CONFIG_SMP
+ cpucache_t *cc = cc_data(cachep);
+
+ CHECK_PAGE(virt_to_page(objp));
+ if (cc) {
+ int batchcount;
+ if (cc->avail < cc->limit) {
+ STATS_INC_FREEHIT(cachep);
+ cc_entry(cc)[cc->avail++] = objp;
+ return;
+ }
+ STATS_INC_FREEMISS(cachep);
+ batchcount = cachep->batchcount;
+ cc->avail -= batchcount;
+ free_block(cachep,
+ &cc_entry(cc)[cc->avail],batchcount);
+ cc_entry(cc)[cc->avail++] = objp;
+ return;
+ } else {
+ free_block(cachep, &objp, 1);
+ }
+#else
+ kmem_cache_free_one(cachep, objp);
+#endif
+}
+
+/**
+ * kmem_cache_alloc - Allocate an object
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ *
+ * Allocate an object from this cache. The flags are only relevant
+ * if the cache has no available objects.
+ */
+void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
+{
+ return __kmem_cache_alloc(cachep, flags);
+}
+
+/**
+ * kmalloc - allocate memory
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * kmalloc is the normal method of allocating memory
+ * in the kernel.
+ *
+ * The @flags argument may be one of:
+ *
+ * %GFP_USER - Allocate memory on behalf of user. May sleep.
+ *
+ * %GFP_KERNEL - Allocate normal kernel ram. May sleep.
+ *
+ * %GFP_ATOMIC - Allocation will not sleep. Use inside interrupt handlers.
+ *
+ * Additionally, the %GFP_DMA flag may be set to indicate the memory
+ * must be suitable for DMA. This can mean different things on different
+ * platforms. For example, on i386, it means that the memory must come
+ * from the first 16MB.
+ */
+void * kmalloc (size_t size, int flags)
+{
+ cache_sizes_t *csizep = cache_sizes;
+
+ for (; csizep->cs_size; csizep++) {
+ if (size > csizep->cs_size)
+ continue;
+ return __kmem_cache_alloc(flags & GFP_DMA ?
+ csizep->cs_dmacachep : csizep->cs_cachep, flags);
+ }
+ return NULL;
+}
+
+/**
+ * kmem_cache_free - Deallocate an object
+ * @cachep: The cache the allocation was from.
+ * @objp: The previously allocated object.
+ *
+ * Free an object which was previously allocated from this
+ * cache.
+ */
+void kmem_cache_free (kmem_cache_t *cachep, void *objp)
+{
+ unsigned long flags;
+#if DEBUG
+ CHECK_PAGE(virt_to_page(objp));
+ if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
+ BUG();
+#endif
+
+ local_irq_save(flags);
+ __kmem_cache_free(cachep, objp);
+ local_irq_restore(flags);
+}
+
+/**
+ * kfree - free previously allocated memory
+ * @objp: pointer returned by kmalloc.
+ *
+ * Don't free memory not originally allocated by kmalloc()
+ * or you will run into trouble.
+ */
+void kfree (const void *objp)
+{
+ kmem_cache_t *c;
+ unsigned long flags;
+
+ if (!objp)
+ return;
+ local_irq_save(flags);
+ CHECK_PAGE(virt_to_page(objp));
+ c = GET_PAGE_CACHE(virt_to_page(objp));
+ __kmem_cache_free(c, (void*)objp);
+ local_irq_restore(flags);
+}
+
+kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
+{
+ cache_sizes_t *csizep = cache_sizes;
+
+ /* This function could be moved to the header file, and
+ * made inline so consumers can quickly determine what
+ * cache pointer they require.
+ */
+ for ( ; csizep->cs_size; csizep++) {
+ if (size > csizep->cs_size)
+ continue;
+ break;
+ }
+ return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
+}
+
+#ifdef CONFIG_SMP
+
+/* called with cache_chain_sem acquired. */
+static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
+{
+ ccupdate_struct_t new;
+ int i;
+
+ /*
+ * These are admin-provided, so we are more graceful.
+ */
+ if (limit < 0)
+ return -EINVAL;
+ if (batchcount < 0)
+ return -EINVAL;
+ if (batchcount > limit)
+ return -EINVAL;
+ if (limit != 0 && !batchcount)
+ return -EINVAL;
+
+ memset(&new.new,0,sizeof(new.new));
+ if (limit) {
+ for (i = 0; i< smp_num_cpus; i++) {
+ cpucache_t* ccnew;
+
+ ccnew = kmalloc(sizeof(void*)*limit+
+ sizeof(cpucache_t), GFP_KERNEL);
+ if (!ccnew)
+ goto oom;
+ ccnew->limit = limit;
+ ccnew->avail = 0;
+ new.new[cpu_logical_map(i)] = ccnew;
+ }
+ }
+ new.cachep = cachep;
+ spin_lock_irq(&cachep->spinlock);
+ cachep->batchcount = batchcount;
+ spin_unlock_irq(&cachep->spinlock);
+
+ smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+
+ for (i = 0; i < smp_num_cpus; i++) {
+ cpucache_t* ccold = new.new[cpu_logical_map(i)];
+ if (!ccold)
+ continue;
+ local_irq_disable();
+ free_block(cachep, cc_entry(ccold), ccold->avail);
+ local_irq_enable();
+ kfree(ccold);
+ }
+ return 0;
+oom:
+ for (i--; i >= 0; i--)
+ kfree(new.new[cpu_logical_map(i)]);
+ return -ENOMEM;
+}
+
+static void enable_cpucache (kmem_cache_t *cachep)
+{
+ int err;
+ int limit;
+
+ /* FIXME: optimize */
+ if (cachep->objsize > PAGE_SIZE)
+ return;
+ if (cachep->objsize > 1024)
+ limit = 60;
+ else if (cachep->objsize > 256)
+ limit = 124;
+ else
+ limit = 252;
+
+ err = kmem_tune_cpucache(cachep, limit, limit/2);
+ if (err)
+ printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
+ cachep->name, -err);
+}
+
+static void enable_all_cpucaches (void)
+{
+ struct list_head* p;
+ unsigned long spin_flags;
+
+ down(&cache_chain_sem);
+
+ p = &cache_cache.next;
+ do {
+ kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
+
+ enable_cpucache(cachep);
+ p = cachep->next.next;
+ } while (p != &cache_cache.next);
+
+ up(&cache_chain_sem);
+}
+#endif
+
+/**
+ * kmem_cache_reap - Reclaim memory from caches.
+ * @gfp_mask: the type of memory required.
+ *
+ * Called from do_try_to_free_pages() and __alloc_pages()
+ */
+int kmem_cache_reap (int gfp_mask)
+{
+ slab_t *slabp;
+ kmem_cache_t *searchp;
+ kmem_cache_t *best_cachep;
+ unsigned int best_pages;
+ unsigned int best_len;
+ unsigned int scan;
+ int ret = 0;
+ unsigned long spin_flags;
+
+ down(&cache_chain_sem);
+
+ scan = REAP_SCANLEN;
+ best_len = 0;
+ best_pages = 0;
+ best_cachep = NULL;
+ searchp = clock_searchp;
+ do {
+ unsigned int pages;
+ struct list_head* p;
+ unsigned int full_free;
+
+ /* It's safe to test this without holding the cache-lock. */
+ if (searchp->flags & SLAB_NO_REAP)
+ goto next;
+ spin_lock_irq(&searchp->spinlock);
+ if (searchp->growing)
+ goto next_unlock;
+ if (searchp->dflags & DFLGS_GROWN) {
+ searchp->dflags &= ~DFLGS_GROWN;
+ goto next_unlock;
+ }
+#ifdef CONFIG_SMP
+ {
+ cpucache_t *cc = cc_data(searchp);
+ if (cc && cc->avail) {
+ __free_block(searchp, cc_entry(cc), cc->avail);
+ cc->avail = 0;
+ }
+ }
+#endif
+
+ full_free = 0;
+ p = searchp->slabs_free.next;
+ while (p != &searchp->slabs_free) {
+ slabp = list_entry(p, slab_t, list);
+#if DEBUG
+ if (slabp->inuse)
+ BUG();
+#endif
+ full_free++;
+ p = p->next;
+ }
+
+ /*
+ * Try to avoid slabs with constructors and/or
+ * more than one page per slab (as it can be difficult
+ * to get high orders from gfp()).
+ */
+ pages = full_free * (1<<searchp->gfporder);
+ if (searchp->ctor)
+ pages = (pages*4+1)/5;
+ if (searchp->gfporder)
+ pages = (pages*4+1)/5;
+ if (pages > best_pages) {
+ best_cachep = searchp;
+ best_len = full_free;
+ best_pages = pages;
+ if (pages >= REAP_PERFECT) {
+ clock_searchp = list_entry(searchp->next.next,
+ kmem_cache_t,next);
+ goto perfect;
+ }
+ }
+next_unlock:
+ spin_unlock_irq(&searchp->spinlock);
+next:
+ searchp = list_entry(searchp->next.next,kmem_cache_t,next);
+ } while (--scan && searchp != clock_searchp);
+
+ clock_searchp = searchp;
+
+ if (!best_cachep)
+ /* couldn't find anything to reap */
+ goto out;
+
+ spin_lock_irq(&best_cachep->spinlock);
+perfect:
+ /* free only 50% of the free slabs */
+ best_len = (best_len + 1)/2;
+ for (scan = 0; scan < best_len; scan++) {
+ struct list_head *p;
+
+ if (best_cachep->growing)
+ break;
+ p = best_cachep->slabs_free.prev;
+ if (p == &best_cachep->slabs_free)
+ break;
+ slabp = list_entry(p,slab_t,list);
+#if DEBUG
+ if (slabp->inuse)
+ BUG();
+#endif
+ list_del(&slabp->list);
+ STATS_INC_REAPED(best_cachep);
+
+ /* Safe to drop the lock. The slab is no longer linked to the
+ * cache.
+ */
+ spin_unlock_irq(&best_cachep->spinlock);
+ kmem_slab_destroy(best_cachep, slabp);
+ spin_lock_irq(&best_cachep->spinlock);
+ }
+ spin_unlock_irq(&best_cachep->spinlock);
+ ret = scan * (1 << best_cachep->gfporder);
+out:
+ up(&cache_chain_sem);
+ return ret;
+}
+
+void dump_slabinfo()
+{
+ struct list_head *p;
+ unsigned long spin_flags;
+
+ /* Output format version, so at least we can change it without _too_
+ * many complaints.
+ */
+ printk( "slabinfo - version: 1.1"
+#if STATS
+ " (statistics)"
+#endif
+#ifdef CONFIG_SMP
+ " (SMP)"
+#endif
+ "\n");
+ down(&cache_chain_sem);
+ p = &cache_cache.next;
+ do {
+ kmem_cache_t *cachep;
+ struct list_head *q;
+ slab_t *slabp;
+ unsigned long active_objs;
+ unsigned long num_objs;
+ unsigned long active_slabs = 0;
+ unsigned long num_slabs;
+ cachep = list_entry(p, kmem_cache_t, next);
+
+ spin_lock_irq(&cachep->spinlock);
+ active_objs = 0;
+ num_slabs = 0;
+ list_for_each(q,&cachep->slabs_full) {
+ slabp = list_entry(q, slab_t, list);
+ if (slabp->inuse != cachep->num)
+ BUG();
+ active_objs += cachep->num;
+ active_slabs++;
+ }
+ list_for_each(q,&cachep->slabs_partial) {
+ slabp = list_entry(q, slab_t, list);
+ if (slabp->inuse == cachep->num || !slabp->inuse)
+ BUG();
+ active_objs += slabp->inuse;
+ active_slabs++;
+ }
+ list_for_each(q,&cachep->slabs_free) {
+ slabp = list_entry(q, slab_t, list);
+ if (slabp->inuse)
+ BUG();
+ num_slabs++;
+ }
+ num_slabs+=active_slabs;
+ num_objs = num_slabs*cachep->num;
+
+ printk("%-17s %6lu %6lu %6u %4lu %4lu %4u",
+ cachep->name, active_objs, num_objs, cachep->objsize,
+ active_slabs, num_slabs, (1<<cachep->gfporder));
+
+#if STATS
+ {
+ unsigned long errors = cachep->errors;
+ unsigned long high = cachep->high_mark;
+ unsigned long grown = cachep->grown;
+ unsigned long reaped = cachep->reaped;
+ unsigned long allocs = cachep->num_allocations;
+
+ printk(" : %6lu %7lu %5lu %4lu %4lu",
+ high, allocs, grown, reaped, errors);
+ }
+#endif
+#ifdef CONFIG_SMP
+ {
+ unsigned int batchcount = cachep->batchcount;
+ unsigned int limit;
+
+ if (cc_data(cachep))
+ limit = cc_data(cachep)->limit;
+ else
+ limit = 0;
+ printk(" : %4u %4u",
+ limit, batchcount);
+ }
+#endif
+#if STATS && defined(CONFIG_SMP)
+ {
+ unsigned long allochit = atomic_read(&cachep->allochit);
+ unsigned long allocmiss = atomic_read(&cachep->allocmiss);
+ unsigned long freehit = atomic_read(&cachep->freehit);
+ unsigned long freemiss = atomic_read(&cachep->freemiss);
+ printk(" : %6lu %6lu %6lu %6lu",
+ allochit, allocmiss, freehit, freemiss);
+ }
+#endif
+ printk("\n");
+ spin_unlock_irq(&cachep->spinlock);
+
+ p = cachep->next.next;
+ } while (p != &cache_cache.next);
+
+ up(&cache_chain_sem);
+
+ return;
+}
+
+
+
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
new file mode 100644
index 0000000000..b98c47f3ce
--- /dev/null
+++ b/xen/common/softirq.c
@@ -0,0 +1,332 @@
+/*
+ * linux/kernel/softirq.c
+ *
+ * Copyright (C) 1992 Linus Torvalds
+ *
+ * Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
+ * due bh_mask_count not atomic handling. Copyright (C) 1998 Andrea Arcangeli
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+//#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+//#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/tqueue.h>
+
+/*
+ - No shared variables, all the data are CPU local.
+ - If a softirq needs serialization, let it serialize itself
+ by its own spinlocks.
+ - Even if softirq is serialized, only local cpu is marked for
+ execution. Hence, we get something sort of weak cpu binding.
+ Though it is still not clear, will it result in better locality
+ or will not.
+ - These softirqs are not masked by global cli() and start_bh_atomic()
+ (by clear reasons). Hence, old parts of code still using global locks
+ MUST NOT use softirqs, but insert interfacing routines acquiring
+ global locks. F.e. look at BHs implementation.
+
+ Examples:
+ - NET RX softirq. It is multithreaded and does not require
+ any global serialization.
+ - NET TX softirq. It kicks software netdevice queues, hence
+ it is logically serialized per device, but this serialization
+ is invisible to common code.
+ - Tasklets: serialized wrt itself.
+ - Bottom halves: globally serialized, grr...
+ */
+
+irq_cpustat_t irq_stat[NR_CPUS];
+
+static struct softirq_action softirq_vec[32] __cacheline_aligned;
+
+
+asmlinkage void do_softirq()
+{
+ int cpu = smp_processor_id();
+ __u32 pending;
+ long flags;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ pending = softirq_pending(cpu);
+
+ while (pending) {
+ struct softirq_action *h;
+
+ local_bh_disable();
+restart:
+ /* Reset the pending bitmask before enabling irqs */
+ softirq_pending(cpu) = 0;
+
+ local_irq_enable();
+
+ h = softirq_vec;
+
+ do {
+ if (pending & 1)
+ h->action(h);
+ h++;
+ pending >>= 1;
+ } while (pending);
+
+ local_irq_disable();
+
+ pending = softirq_pending(cpu);
+ if (pending) goto restart;
+ __local_bh_enable();
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * This function must run with irq disabled!
+ */
+inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
+{
+ __cpu_raise_softirq(cpu, nr);
+
+#ifdef CONFIG_SMP
+ if ( cpu != smp_processor_id() )
+ smp_send_event_check_cpu(cpu);
+#endif
+}
+
+void raise_softirq(unsigned int nr)
+{
+ long flags;
+
+ local_irq_save(flags);
+ cpu_raise_softirq(smp_processor_id(), nr);
+ local_irq_restore(flags);
+}
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+ softirq_vec[nr].data = data;
+ softirq_vec[nr].action = action;
+}
+
+
+/* Tasklets */
+
+struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned;
+struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned;
+
+void __tasklet_schedule(struct tasklet_struct *t)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ local_irq_save(flags);
+ t->next = tasklet_vec[cpu].list;
+ tasklet_vec[cpu].list = t;
+ cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+ local_irq_restore(flags);
+}
+
+void __tasklet_hi_schedule(struct tasklet_struct *t)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ local_irq_save(flags);
+ t->next = tasklet_hi_vec[cpu].list;
+ tasklet_hi_vec[cpu].list = t;
+ cpu_raise_softirq(cpu, HI_SOFTIRQ);
+ local_irq_restore(flags);
+}
+
+static void tasklet_action(struct softirq_action *a)
+{
+ int cpu = smp_processor_id();
+ struct tasklet_struct *list;
+
+ local_irq_disable();
+ list = tasklet_vec[cpu].list;
+ tasklet_vec[cpu].list = NULL;
+ local_irq_enable();
+
+ while (list) {
+ struct tasklet_struct *t = list;
+
+ list = list->next;
+
+ if (tasklet_trylock(t)) {
+ if (!atomic_read(&t->count)) {
+ if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+ BUG();
+ t->func(t->data);
+ }
+ tasklet_unlock(t);
+ continue;
+ }
+
+ local_irq_disable();
+ t->next = tasklet_vec[cpu].list;
+ tasklet_vec[cpu].list = t;
+ __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+ local_irq_enable();
+ }
+}
+
+static void tasklet_hi_action(struct softirq_action *a)
+{
+ int cpu = smp_processor_id();
+ struct tasklet_struct *list;
+
+ local_irq_disable();
+ list = tasklet_hi_vec[cpu].list;
+ tasklet_hi_vec[cpu].list = NULL;
+ local_irq_enable();
+
+ while (list) {
+ struct tasklet_struct *t = list;
+
+ list = list->next;
+
+ if (tasklet_trylock(t)) {
+ if (!atomic_read(&t->count)) {
+ if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+ BUG();
+ t->func(t->data);
+ }
+ tasklet_unlock(t);
+ continue;
+ }
+
+ local_irq_disable();
+ t->next = tasklet_hi_vec[cpu].list;
+ tasklet_hi_vec[cpu].list = t;
+ __cpu_raise_softirq(cpu, HI_SOFTIRQ);
+ local_irq_enable();
+ }
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+ void (*func)(unsigned long), unsigned long data)
+{
+ t->next = NULL;
+ t->state = 0;
+ atomic_set(&t->count, 0);
+ t->func = func;
+ t->data = data;
+}
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+ if (in_interrupt())
+ printk("Attempt to kill tasklet from interrupt\n");
+
+ while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+ set_current_state(TASK_RUNNING);
+ do {
+ current->policy |= SCHED_YIELD;
+ schedule();
+ } while (test_bit(TASKLET_STATE_SCHED, &t->state));
+ }
+ tasklet_unlock_wait(t);
+ clear_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
+
+
+/* Old style BHs */
+
+static void (*bh_base[32])(void);
+struct tasklet_struct bh_task_vec[32];
+
+/* BHs are serialized by spinlock global_bh_lock.
+
+ It is still possible to make synchronize_bh() as
+ spin_unlock_wait(&global_bh_lock). This operation is not used
+ by kernel now, so that this lock is not made private only
+ due to wait_on_irq().
+
+ It can be removed only after auditing all the BHs.
+ */
+spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED;
+
+static void bh_action(unsigned long nr)
+{
+ int cpu = smp_processor_id();
+
+ if (!spin_trylock(&global_bh_lock))
+ goto resched;
+
+ if (!hardirq_trylock(cpu))
+ goto resched_unlock;
+
+ if (bh_base[nr])
+ bh_base[nr]();
+
+ hardirq_endlock(cpu);
+ spin_unlock(&global_bh_lock);
+ return;
+
+resched_unlock:
+ spin_unlock(&global_bh_lock);
+resched:
+ mark_bh(nr);
+}
+
+void init_bh(int nr, void (*routine)(void))
+{
+ bh_base[nr] = routine;
+ mb();
+}
+
+void remove_bh(int nr)
+{
+ tasklet_kill(bh_task_vec+nr);
+ bh_base[nr] = NULL;
+}
+
+void __init softirq_init()
+{
+ int i;
+
+ for (i=0; i<32; i++)
+ tasklet_init(bh_task_vec+i, bh_action, i);
+
+ open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+ open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+void __run_task_queue(task_queue *list)
+{
+ struct list_head head, *next;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tqueue_lock, flags);
+ list_add(&head, list);
+ list_del_init(list);
+ spin_unlock_irqrestore(&tqueue_lock, flags);
+
+ next = head.next;
+ while (next != &head) {
+ void (*f) (void *);
+ struct tq_struct *p;
+ void *data;
+
+ p = list_entry(next, struct tq_struct, list);
+ next = next->next;
+ f = p->routine;
+ data = p->data;
+ wmb();
+ p->sync = 0;
+ if (f)
+ f(data);
+ }
+}
+
diff --git a/xen/common/timer.c b/xen/common/timer.c
new file mode 100644
index 0000000000..20d45ccbe6
--- /dev/null
+++ b/xen/common/timer.c
@@ -0,0 +1,603 @@
+/*
+ * linux/kernel/timer.c
+ *
+ * Kernel internal timers, kernel timekeeping, basic process system calls
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
+ * "A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ * serialize accesses to xtime/lost_ticks).
+ * Copyright (C) 1998 Andrea Arcangeli
+ * 1999-03-10 Improved NTP compatibility by Ulrich Windl
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/timex.h>
+#include <linux/tqueue.h>
+#include <linux/delay.h>
+//#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+//#include <linux/kernel_stat.h>
+
+#include <xeno/event.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Timekeeping variables
+ */
+
+long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
+
+/* The current time */
+struct timeval xtime __attribute__ ((aligned (16)));
+
+/* Don't completely fail for HZ > 500. */
+int tickadj = 500/HZ ? : 1; /* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+int time_state = TIME_OK; /* clock synchronization status */
+int time_status = STA_UNSYNC; /* clock status bits */
+long time_offset; /* time adjustment (us) */
+long time_constant = 2; /* pll time constant */
+long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
+long time_precision = 1; /* clock precision (us) */
+long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
+long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
+long time_phase; /* phase offset (scaled us) */
+long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
+ /* frequency offset (scaled ppm)*/
+long time_adj; /* tick adjust (scaled 1 / HZ) */
+long time_reftime; /* time at last adjustment (s) */
+
+long time_adjust;
+long time_adjust_step;
+
+unsigned long event;
+
+unsigned long volatile jiffies;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+ int index;
+ struct list_head vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+ int index;
+ struct list_head vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5;
+static struct timer_vec tv4;
+static struct timer_vec tv3;
+static struct timer_vec tv2;
+static struct timer_vec_root tv1;
+
+static struct timer_vec * const tvecs[] = {
+ (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+void init_timervecs (void)
+{
+ int i;
+
+ for (i = 0; i < TVN_SIZE; i++) {
+ INIT_LIST_HEAD(tv5.vec + i);
+ INIT_LIST_HEAD(tv4.vec + i);
+ INIT_LIST_HEAD(tv3.vec + i);
+ INIT_LIST_HEAD(tv2.vec + i);
+ }
+ for (i = 0; i < TVR_SIZE; i++)
+ INIT_LIST_HEAD(tv1.vec + i);
+}
+
+static unsigned long timer_jiffies;
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+ /*
+ * must be cli-ed when calling this
+ */
+ unsigned long expires = timer->expires;
+ unsigned long idx = expires - timer_jiffies;
+ struct list_head * vec;
+
+ if (idx < TVR_SIZE) {
+ int i = expires & TVR_MASK;
+ vec = tv1.vec + i;
+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+ int i = (expires >> TVR_BITS) & TVN_MASK;
+ vec = tv2.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ vec = tv3.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+ vec = tv4.vec + i;
+ } else if ((signed long) idx < 0) {
+ /* can happen if you add a timer with expires == jiffies,
+ * or you set a timer to go off in the past
+ */
+ vec = tv1.vec + tv1.index;
+ } else if (idx <= 0xffffffffUL) {
+ int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+ vec = tv5.vec + i;
+ } else {
+ /* Can only get here on architectures with 64-bit jiffies */
+ INIT_LIST_HEAD(&timer->list);
+ return;
+ }
+ /*
+ * Timers are FIFO!
+ */
+ list_add(&timer->list, vec->prev);
+}
+
+/* Initialize both explicitly - let's try to have them in the same cache line */
+spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_SMP
+volatile struct timer_list * volatile running_timer;
+#define timer_enter(t) do { running_timer = t; mb(); } while (0)
+#define timer_exit() do { running_timer = NULL; } while (0)
+#define timer_is_running(t) (running_timer == t)
+#define timer_synchronize(t) while (timer_is_running(t)) barrier()
+#else
+#define timer_enter(t) do { } while (0)
+#define timer_exit() do { } while (0)
+#endif
+
+void add_timer(struct timer_list *timer)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ if (timer_pending(timer))
+ goto bug;
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return;
+bug:
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ printk("bug: kernel timer added twice at %p.\n",
+ __builtin_return_address(0));
+}
+
+static inline int detach_timer (struct timer_list *timer)
+{
+ if (!timer_pending(timer))
+ return 0;
+ list_del(&timer->list);
+ return 1;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ timer->expires = expires;
+ ret = detach_timer(timer);
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
+}
+
+int del_timer(struct timer_list * timer)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret = detach_timer(timer);
+ timer->list.next = timer->list.prev = NULL;
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
+}
+
+#ifdef CONFIG_SMP
+void sync_timers(void)
+{
+ spin_unlock_wait(&global_bh_lock);
+}
+
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+ int ret = 0;
+
+ for (;;) {
+ unsigned long flags;
+ int running;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret += detach_timer(timer);
+ timer->list.next = timer->list.prev = 0;
+ running = timer_is_running(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+
+ if (!running)
+ break;
+
+ timer_synchronize(timer);
+ }
+
+ return ret;
+}
+#endif
+
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+ /* cascade all the timers from tv up one level */
+ struct list_head *head, *curr, *next;
+
+ head = tv->vec + tv->index;
+ curr = head->next;
+ /*
+ * We are removing _all_ timers from the list, so we don't have to
+ * detach them individually, just clear the list afterwards.
+ */
+ while (curr != head) {
+ struct timer_list *tmp;
+
+ tmp = list_entry(curr, struct timer_list, list);
+ next = curr->next;
+ list_del(curr); // not needed
+ internal_add_timer(tmp);
+ curr = next;
+ }
+ INIT_LIST_HEAD(head);
+ tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+ spin_lock_irq(&timerlist_lock);
+ while ((long)(jiffies - timer_jiffies) >= 0) {
+ struct list_head *head, *curr;
+ if (!tv1.index) {
+ int n = 1;
+ do {
+ cascade_timers(tvecs[n]);
+ } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+ }
+repeat:
+ head = tv1.vec + tv1.index;
+ curr = head->next;
+ if (curr != head) {
+ struct timer_list *timer;
+ void (*fn)(unsigned long);
+ unsigned long data;
+
+ timer = list_entry(curr, struct timer_list, list);
+ fn = timer->function;
+ data= timer->data;
+
+ detach_timer(timer);
+ timer->list.next = timer->list.prev = NULL;
+ timer_enter(timer);
+ spin_unlock_irq(&timerlist_lock);
+ fn(data);
+ spin_lock_irq(&timerlist_lock);
+ timer_exit();
+ goto repeat;
+ }
+ ++timer_jiffies;
+ tv1.index = (tv1.index + 1) & TVR_MASK;
+ }
+ spin_unlock_irq(&timerlist_lock);
+}
+
+spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
+
+void tqueue_bh(void)
+{
+ run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void)
+{
+ run_task_queue(&tq_immediate);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ */
+static void second_overflow(void)
+{
+ long ltemp;
+
+ /* Bump the maxerror field */
+ time_maxerror += time_tolerance >> SHIFT_USEC;
+ if ( time_maxerror > NTP_PHASE_LIMIT ) {
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_status |= STA_UNSYNC;
+ }
+
+ /*
+ * Leap second processing. If in leap-insert state at
+ * the end of the day, the system clock is set back one
+ * second; if in leap-delete state, the system clock is
+ * set ahead one second. The microtime() routine or
+ * external clock driver will insure that reported time
+ * is always monotonic. The ugly divides should be
+ * replaced.
+ */
+ switch (time_state) {
+
+ case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
+ break;
+
+ case TIME_INS:
+ if (xtime.tv_sec % 86400 == 0) {
+ xtime.tv_sec--;
+ time_state = TIME_OOP;
+ printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+ }
+ break;
+
+ case TIME_DEL:
+ if ((xtime.tv_sec + 1) % 86400 == 0) {
+ xtime.tv_sec++;
+ time_state = TIME_WAIT;
+ printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+ }
+ break;
+
+ case TIME_OOP:
+ time_state = TIME_WAIT;
+ break;
+
+ case TIME_WAIT:
+ if (!(time_status & (STA_INS | STA_DEL)))
+ time_state = TIME_OK;
+ }
+
+ /*
+ * Compute the phase adjustment for the next second. In
+ * PLL mode, the offset is reduced by a fixed factor
+ * times the time constant. In FLL mode the offset is
+ * used directly. In either mode, the maximum phase
+ * adjustment for each second is clamped so as to spread
+ * the adjustment over not more than the number of
+ * seconds between updates.
+ */
+ if (time_offset < 0) {
+ ltemp = -time_offset;
+ if (!(time_status & STA_FLL))
+ ltemp >>= SHIFT_KG + time_constant;
+ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+ ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+ time_offset += ltemp;
+ time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ } else {
+ ltemp = time_offset;
+ if (!(time_status & STA_FLL))
+ ltemp >>= SHIFT_KG + time_constant;
+ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+ ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+ time_offset -= ltemp;
+ time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ }
+
+ if (ltemp < 0)
+ time_adj -= -ltemp >>
+ (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+ else
+ time_adj += ltemp >>
+ (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+
+#if HZ == 100
+ /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
+ * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
+ */
+ if (time_adj < 0)
+ time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
+ else
+ time_adj += (time_adj >> 2) + (time_adj >> 5);
+#endif
+}
+
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+ if ( (time_adjust_step = time_adjust) != 0 ) {
+ /* We are doing an adjtime thing.
+ *
+ * Prepare time_adjust_step to be within bounds.
+ * Note that a positive time_adjust means we want the clock
+ * to run faster.
+ *
+ * Limit the amount of the step to be in the range
+ * -tickadj .. +tickadj
+ */
+ if (time_adjust > tickadj)
+ time_adjust_step = tickadj;
+ else if (time_adjust < -tickadj)
+ time_adjust_step = -tickadj;
+
+ /* Reduce by this step the amount of time left */
+ time_adjust -= time_adjust_step;
+ }
+ xtime.tv_usec += tick + time_adjust_step;
+ /*
+ * Advance the phase, once it gets to one microsecond, then
+ * advance the tick more.
+ */
+ time_phase += time_adj;
+ if (time_phase <= -FINEUSEC) {
+ long ltemp = -time_phase >> SHIFT_SCALE;
+ time_phase += ltemp << SHIFT_SCALE;
+ xtime.tv_usec -= ltemp;
+ }
+ else if (time_phase >= FINEUSEC) {
+ long ltemp = time_phase >> SHIFT_SCALE;
+ time_phase -= ltemp << SHIFT_SCALE;
+ xtime.tv_usec += ltemp;
+ }
+}
+
+/*
+ * Using a loop looks inefficient, but "ticks" is
+ * usually just one (we shouldn't be losing ticks,
+ * we're doing this this way mainly for interrupt
+ * latency reasons, not because we think we'll
+ * have lots of lost timer ticks
+ */
+static void update_wall_time(unsigned long ticks)
+{
+ do {
+ ticks--;
+ update_wall_time_one_tick();
+ } while (ticks);
+
+ if (xtime.tv_usec >= 1000000) {
+ xtime.tv_usec -= 1000000;
+ xtime.tv_sec++;
+ second_overflow();
+ }
+}
+
+static inline void do_process_times(struct task_struct *p,
+ unsigned long user, unsigned long system)
+{
+ //unsigned long psecs;
+
+// psecs = (p->times.tms_utime += user);
+ //psecs += (p->times.tms_stime += system);
+}
+
+
+void update_one_process(struct task_struct *p, unsigned long user,
+ unsigned long system, int cpu)
+{
+// p->per_cpu_utime[cpu] += user;
+// p->per_cpu_stime[cpu] += system;
+ do_process_times(p, user, system);
+}
+
+/*
+ * Called from the timer interrupt handler to charge one tick to the current
+ * process. user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+ struct task_struct *p = current;
+ int cpu = smp_processor_id(), system = user_tick ^ 1;
+
+ update_one_process(p, user_tick, system, cpu);
+
+ if ( --p->counter <= 0 )
+ {
+ p->counter = 0;
+ set_bit(_HYP_EVENT_NEED_RESCHED, &p->hyp_events);
+ }
+}
+
+
+/* jiffies at the most recent update of wall time */
+unsigned long wall_jiffies;
+
+/*
+ * This spinlock protect us from races in SMP while playing with xtime. -arca
+ */
+rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+
+static inline void update_times(void)
+{
+ unsigned long ticks;
+
+ /*
+ * update_times() is run from the raw timer_bh handler so we
+ * just know that the irqs are locally enabled and so we don't
+ * need to save/restore the flags of the local CPU here. -arca
+ */
+ write_lock_irq(&xtime_lock);
+
+ ticks = jiffies - wall_jiffies;
+ if (ticks) {
+ wall_jiffies += ticks;
+ update_wall_time(ticks);
+ }
+ write_unlock_irq(&xtime_lock);
+}
+
+void timer_bh(void)
+{
+ update_times();
+ run_timer_list();
+}
+
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/smp.h>
+#include <xeno/irq.h>
+#include <asm/msr.h>
+
+void do_timer(struct pt_regs *regs)
+{
+
+ (*(unsigned long *)&jiffies)++;
+
+ if ( !using_apic_timer )
+ update_process_times(user_mode(regs));
+
+ mark_bh(TIMER_BH);
+ if (TQ_ACTIVE(tq_timer))
+ mark_bh(TQUEUE_BH);
+}
+
+void get_fast_time(struct timeval * tm)
+{
+ *tm=xtime;
+}
diff --git a/xen/common/vsprintf.c b/xen/common/vsprintf.c
new file mode 100644
index 0000000000..fe17225088
--- /dev/null
+++ b/xen/common/vsprintf.c
@@ -0,0 +1,713 @@
+/*
+ * linux/lib/vsprintf.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */
+/*
+ * Wirzenius wrote this portably, Torvalds fucked it up :-)
+ */
+
+/*
+ * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
+ * - changed to provide snprintf and vsnprintf functions
+ */
+
+#include <stdarg.h>
+#include <xeno/ctype.h>
+#include <xeno/lib.h>
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+ unsigned long result = 0,value;
+
+ if (!base) {
+ base = 10;
+ if (*cp == '0') {
+ base = 8;
+ cp++;
+ if ((*cp == 'x') && isxdigit(cp[1])) {
+ cp++;
+ base = 16;
+ }
+ }
+ }
+ while (isxdigit(*cp) &&
+ (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+ result = result*base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+ return result;
+}
+
+/**
+ * simple_strtol - convert a string to a signed long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long simple_strtol(const char *cp,char **endp,unsigned int base)
+{
+ if(*cp=='-')
+ return -simple_strtoul(cp+1,endp,base);
+ return simple_strtoul(cp,endp,base);
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
+{
+ unsigned long long result = 0,value;
+
+ if (!base) {
+ base = 10;
+ if (*cp == '0') {
+ base = 8;
+ cp++;
+ if ((*cp == 'x') && isxdigit(cp[1])) {
+ cp++;
+ base = 16;
+ }
+ }
+ }
+ while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
+ ? toupper(*cp) : *cp)-'A'+10) < base) {
+ result = result*base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+ return result;
+}
+
+/**
+ * simple_strtoll - convert a string to a signed long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long long simple_strtoll(const char *cp,char **endp,unsigned int base)
+{
+ if(*cp=='-')
+ return -simple_strtoull(cp+1,endp,base);
+ return simple_strtoull(cp,endp,base);
+}
+
+static int skip_atoi(const char **s)
+{
+ int i=0;
+
+ while (isdigit(**s))
+ i = i*10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SPECIAL 32 /* 0x */
+#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long num, int base, int size, int precision, int type)
+{
+ char c,sign,tmp[66];
+ const char *digits;
+ const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ int i;
+
+ digits = (type & LARGE) ? large_digits : small_digits;
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 36)
+ return buf;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else
+ {
+ /* XXX KAF: force unsigned mod and div. */
+ unsigned long num2=(unsigned long)num;
+ unsigned int base2=(unsigned int)base;
+ while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
+ }
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(ZEROPAD+LEFT))) {
+ while(size-->0) {
+ if (buf <= end)
+ *buf = ' ';
+ ++buf;
+ }
+ }
+ if (sign) {
+ if (buf <= end)
+ *buf = sign;
+ ++buf;
+ }
+ if (type & SPECIAL) {
+ if (base==8) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ } else if (base==16) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ if (buf <= end)
+ *buf = digits[33];
+ ++buf;
+ }
+ }
+ if (!(type & LEFT)) {
+ while (size-- > 0) {
+ if (buf <= end)
+ *buf = c;
+ ++buf;
+ }
+ }
+ while (i < precision--) {
+ if (buf <= end)
+ *buf = '0';
+ ++buf;
+ }
+ while (i-- > 0) {
+ if (buf <= end)
+ *buf = tmp[i];
+ ++buf;
+ }
+ while (size-- > 0) {
+ if (buf <= end)
+ *buf = ' ';
+ ++buf;
+ }
+ return buf;
+}
+
+/**
+* vsnprintf - Format a string and place it in a buffer
+* @buf: The buffer to place the result into
+* @size: The size of the buffer, including the trailing null space
+* @fmt: The format string to use
+* @args: Arguments for the format string
+*
+* Call this function if you are already dealing with a va_list.
+* You probably want snprintf instead.
+ */
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long long num;
+ int i, base;
+ char *str, *end, c;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+ /* 'z' support added 23/7/1999 S.H. */
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+ str = buf;
+ end = buf + size - 1;
+
+ if (end < buf - 1) {
+ end = ((void *) -1);
+ size = end - buf + 1;
+ }
+
+ for (; *fmt ; ++fmt) {
+ if (*fmt != '%') {
+ if (str <= end)
+ *str = *fmt;
+ ++str;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-': flags |= LEFT; goto repeat;
+ case '+': flags |= PLUS; goto repeat;
+ case ' ': flags |= SPACE; goto repeat;
+ case '#': flags |= SPECIAL; goto repeat;
+ case '0': flags |= ZEROPAD; goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+ qualifier = *fmt;
+ ++fmt;
+ if (qualifier == 'l' && *fmt == 'l') {
+ qualifier = 'L';
+ ++fmt;
+ }
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT)) {
+ while (--field_width > 0) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ c = (unsigned char) va_arg(args, int);
+ if (str <= end)
+ *str = c;
+ ++str;
+ while (--field_width > 0) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ if (!s)
+ s = "<NULL>";
+
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT)) {
+ while (len < field_width--) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ for (i = 0; i < len; ++i) {
+ if (str <= end)
+ *str = *s;
+ ++str; ++s;
+ }
+ while (len < field_width--) {
+ if (str <= end)
+ *str = ' ';
+ ++str;
+ }
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2*sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ str = number(str, end,
+ (unsigned long) va_arg(args, void *),
+ 16, field_width, precision, flags);
+ continue;
+
+
+ case 'n':
+ /* FIXME:
+ * What does C99 say about the overflow case here? */
+ if (qualifier == 'l') {
+ long * ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else if (qualifier == 'Z') {
+ size_t * ip = va_arg(args, size_t *);
+ *ip = (str - buf);
+ } else {
+ int * ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ if (str <= end)
+ *str = '%';
+ ++str;
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'X':
+ flags |= LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ if (str <= end)
+ *str = '%';
+ ++str;
+ if (*fmt) {
+ if (str <= end)
+ *str = *fmt;
+ ++str;
+ } else {
+ --fmt;
+ }
+ continue;
+ }
+ if (qualifier == 'L')
+ num = va_arg(args, long long);
+ else if (qualifier == 'l') {
+ num = va_arg(args, unsigned long);
+ if (flags & SIGN)
+ num = (signed long) num;
+ } else if (qualifier == 'Z') {
+ num = va_arg(args, size_t);
+ } else if (qualifier == 'h') {
+ num = (unsigned short) va_arg(args, int);
+ if (flags & SIGN)
+ num = (signed short) num;
+ } else {
+ num = va_arg(args, unsigned int);
+ if (flags & SIGN)
+ num = (signed int) num;
+ }
+
+ str = number(str, end, num, base,
+ field_width, precision, flags);
+ }
+ if (str <= end)
+ *str = '\0';
+ else if (size > 0)
+ /* don't write out a null byte if the buf size is zero */
+ *end = '\0';
+ /* the trailing null byte doesn't count towards the total
+ * ++str;
+ */
+ return str-buf;
+}
+
+/**
+ * snprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i=vsnprintf(buf,size,fmt,args);
+ va_end(args);
+ return i;
+}
+
+/**
+ * vsprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want sprintf instead.
+ */
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
+}
+
+
+/**
+ * sprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int sprintf(char * buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i=vsprintf(buf,fmt,args);
+ va_end(args);
+ return i;
+}
+
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: format of buffer
+ * @args: arguments
+ */
+int vsscanf(const char * buf, const char * fmt, va_list args)
+{
+ const char *str = buf;
+ char *next;
+ int num = 0;
+ int qualifier;
+ int base;
+ int field_width = -1;
+ int is_sign = 0;
+
+ while(*fmt && *str) {
+ /* skip any white space in format */
+ /* white space in format matchs any amount of
+ * white space, including none, in the input.
+ */
+ if (isspace(*fmt)) {
+ while (isspace(*fmt))
+ ++fmt;
+ while (isspace(*str))
+ ++str;
+ }
+
+ /* anything that is not a conversion must match exactly */
+ if (*fmt != '%' && *fmt) {
+ if (*fmt++ != *str++)
+ break;
+ continue;
+ }
+
+ if (!*fmt)
+ break;
+ ++fmt;
+
+ /* skip this conversion.
+ * advance both strings to next white space
+ */
+ if (*fmt == '*') {
+ while (!isspace(*fmt) && *fmt)
+ fmt++;
+ while (!isspace(*str) && *str)
+ str++;
+ continue;
+ }
+
+ /* get field width */
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+
+ /* get conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z') {
+ qualifier = *fmt;
+ fmt++;
+ }
+ base = 10;
+ is_sign = 0;
+
+ if (!*fmt || !*str)
+ break;
+
+ switch(*fmt++) {
+ case 'c':
+ {
+ char *s = (char *) va_arg(args,char*);
+ if (field_width == -1)
+ field_width = 1;
+ do {
+ *s++ = *str++;
+ } while(field_width-- > 0 && *str);
+ num++;
+ }
+ continue;
+ case 's':
+ {
+ char *s = (char *) va_arg(args, char *);
+ if(field_width == -1)
+ field_width = INT_MAX;
+ /* first, skip leading white space in buffer */
+ while (isspace(*str))
+ str++;
+
+ /* now copy until next white space */
+ while (*str && !isspace(*str) && field_width--) {
+ *s++ = *str++;
+ }
+ *s = '\0';
+ num++;
+ }
+ continue;
+ case 'n':
+ /* return number of characters read so far */
+ {
+ int *i = (int *)va_arg(args,int*);
+ *i = str - buf;
+ }
+ continue;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ case 'd':
+ case 'i':
+ is_sign = 1;
+ case 'u':
+ break;
+ case '%':
+ /* looking for '%' in str */
+ if (*str++ != '%')
+ return num;
+ continue;
+ default:
+ /* invalid format; stop here */
+ return num;
+ }
+
+ /* have some sort of integer conversion.
+ * first, skip white space in buffer.
+ */
+ while (isspace(*str))
+ str++;
+
+ if (!*str || !isdigit(*str))
+ break;
+
+ switch(qualifier) {
+ case 'h':
+ if (is_sign) {
+ short *s = (short *) va_arg(args,short *);
+ *s = (short) simple_strtol(str,&next,base);
+ } else {
+ unsigned short *s = (unsigned short *) va_arg(args, unsigned short *);
+ *s = (unsigned short) simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'l':
+ if (is_sign) {
+ long *l = (long *) va_arg(args,long *);
+ *l = simple_strtol(str,&next,base);
+ } else {
+ unsigned long *l = (unsigned long*) va_arg(args,unsigned long*);
+ *l = simple_strtoul(str,&next,base);
+ }
+ break;
+ case 'L':
+ if (is_sign) {
+ long long *l = (long long*) va_arg(args,long long *);
+ *l = simple_strtoll(str,&next,base);
+ } else {
+ unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*);
+ *l = simple_strtoull(str,&next,base);
+ }
+ break;
+ case 'Z':
+ {
+ size_t *s = (size_t*) va_arg(args,size_t*);
+ *s = (size_t) simple_strtoul(str,&next,base);
+ }
+ break;
+ default:
+ if (is_sign) {
+ int *i = (int *) va_arg(args, int*);
+ *i = (int) simple_strtol(str,&next,base);
+ } else {
+ unsigned int *i = (unsigned int*) va_arg(args, unsigned int*);
+ *i = (unsigned int) simple_strtoul(str,&next,base);
+ }
+ break;
+ }
+ num++;
+
+ if (!next)
+ break;
+ str = next;
+ }
+ return num;
+}
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: formatting of buffer
+ * @...: resulting arguments
+ */
+int sscanf(const char * buf, const char * fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args,fmt);
+ i = vsscanf(buf,fmt,args);
+ va_end(args);
+ return i;
+}
diff --git a/xen/drivers/Makefile b/xen/drivers/Makefile
new file mode 100644
index 0000000000..4aa76a3f25
--- /dev/null
+++ b/xen/drivers/Makefile
@@ -0,0 +1,16 @@
+
+default:
+ $(MAKE) -C char
+ $(MAKE) -C pci
+ $(MAKE) -C net
+ $(MAKE) -C block
+ $(MAKE) -C ide
+ $(MAKE) -C scsi
+
+clean:
+ $(MAKE) -C char clean
+ $(MAKE) -C pci clean
+ $(MAKE) -C net clean
+ $(MAKE) -C block clean
+ $(MAKE) -C ide clean
+ $(MAKE) -C scsi clean
diff --git a/xen/drivers/block/Makefile b/xen/drivers/block/Makefile
new file mode 100644
index 0000000000..574b7d2d79
--- /dev/null
+++ b/xen/drivers/block/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o driver.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen/drivers/block/blkpg.c b/xen/drivers/block/blkpg.c
new file mode 100644
index 0000000000..2e27a1aa2b
--- /dev/null
+++ b/xen/drivers/block/blkpg.c
@@ -0,0 +1,315 @@
+/*
+ * Partition table and disk geometry handling
+ *
+ * This obsoletes the partition-handling code in genhd.c:
+ * Userspace can look at a disk in arbitrary format and tell
+ * the kernel what partitions there are on the disk, and how
+ * these should be numbered.
+ * It also allows one to repartition a disk that is being used.
+ *
+ * A single ioctl with lots of subfunctions:
+ *
+ * Device number stuff:
+ * get_whole_disk() (given the device number of a partition, find
+ * the device number of the encompassing disk)
+ * get_all_partitions() (given the device number of a disk, return the
+ * device numbers of all its known partitions)
+ *
+ * Partition stuff:
+ * add_partition()
+ * delete_partition()
+ * test_partition_in_use() (also for test_disk_in_use)
+ *
+ * Geometry stuff:
+ * get_geometry()
+ * set_geometry()
+ * get_bios_drivedata()
+ *
+ * For today, only the partition stuff - aeb, 990515
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/errno.h>
+/*#include <xeno/fs.h> */ /* for BLKRASET, ... */
+#include <xeno/sched.h> /* for capable() */
+#include <xeno/blk.h> /* for set_device_ro() */
+#include <xeno/blkpg.h>
+#include <xeno/genhd.h>
+/*#include <xeno/swap.h>*/ /* for is_swap_partition() */
+#include <xeno/module.h> /* for EXPORT_SYMBOL */
+
+#include <asm/uaccess.h>
+
+#define is_mounted(_dev) (0)
+#define is_swap_partition(_dev) (0)
+
+#define fsync_dev(_dev) (panic("fsync_dev???"))
+#define invalidate_buffers(_dev) (panic("invalidate_buffers???"))
+
+/*
+ * What is the data describing a partition?
+ *
+ * 1. a device number (kdev_t)
+ * 2. a starting sector and number of sectors (hd_struct)
+ * given in the part[] array of the gendisk structure for the drive.
+ *
+ * The number of sectors is replicated in the sizes[] array of
+ * the gendisk structure for the major, which again is copied to
+ * the blk_size[][] array.
+ * (However, hd_struct has the number of 512-byte sectors,
+ * g->sizes[] and blk_size[][] have the number of 1024-byte blocks.)
+ * Note that several drives may have the same major.
+ */
+
+/*
+ * Add a partition.
+ *
+ * returns: EINVAL: bad parameters
+ * ENXIO: cannot find drive
+ * EBUSY: proposed partition overlaps an existing one
+ * or has the same number as an existing one
+ * 0: all OK.
+ */
+int add_partition(kdev_t dev, struct blkpg_partition *p) {
+ struct gendisk *g;
+ long long ppstart, pplength;
+ long pstart, plength;
+ int i, drive, first_minor, end_minor, minor;
+
+ /* convert bytes to sectors, check for fit in a hd_struct */
+ ppstart = (p->start >> 9);
+ pplength = (p->length >> 9);
+ pstart = ppstart;
+ plength = pplength;
+ if (pstart != ppstart || plength != pplength
+ || pstart < 0 || plength < 0)
+ return -EINVAL;
+
+ /* find the drive major */
+ g = get_gendisk(dev);
+ if (!g)
+ return -ENXIO;
+
+ /* existing drive? */
+ drive = (MINOR(dev) >> g->minor_shift);
+ first_minor = (drive << g->minor_shift);
+ end_minor = first_minor + g->max_p;
+ if (drive >= g->nr_real)
+ return -ENXIO;
+
+ /* drive and partition number OK? */
+ if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p)
+ return -EINVAL;
+
+ /* partition number in use? */
+ minor = first_minor + p->pno;
+ if (g->part[minor].nr_sects != 0)
+ return -EBUSY;
+
+ /* overlap? */
+ for (i=first_minor+1; i<end_minor; i++)
+ if (!(pstart+plength <= g->part[i].start_sect ||
+ pstart >= g->part[i].start_sect + g->part[i].nr_sects))
+ return -EBUSY;
+
+ /* all seems OK */
+ g->part[minor].start_sect = pstart;
+ g->part[minor].nr_sects = plength;
+ if (g->sizes)
+ g->sizes[minor] = (plength >> (BLOCK_SIZE_BITS - 9));
+#ifdef DEVFS_MUST_DIE
+ devfs_register_partitions (g, first_minor, 0);
+#endif
+ return 0;
+}
+
+/*
+ * Delete a partition given by partition number
+ *
+ * returns: EINVAL: bad parameters
+ * ENXIO: cannot find partition
+ * EBUSY: partition is busy
+ * 0: all OK.
+ *
+ * Note that the dev argument refers to the entire disk, not the partition.
+ */
+int del_partition(kdev_t dev, struct blkpg_partition *p) {
+ struct gendisk *g;
+ kdev_t devp;
+ int drive, first_minor, minor;
+
+ /* find the drive major */
+ g = get_gendisk(dev);
+ if (!g)
+ return -ENXIO;
+
+ /* drive and partition number OK? */
+ drive = (MINOR(dev) >> g->minor_shift);
+ first_minor = (drive << g->minor_shift);
+ if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p)
+ return -EINVAL;
+
+ /* existing drive and partition? */
+ minor = first_minor + p->pno;
+ if (drive >= g->nr_real || g->part[minor].nr_sects == 0)
+ return -ENXIO;
+
+ /* partition in use? Incomplete check for now. */
+ devp = MKDEV(MAJOR(dev), minor);
+ if (is_mounted(devp) || is_swap_partition(devp))
+ return -EBUSY;
+
+ /* all seems OK */
+ fsync_dev(devp);
+ invalidate_buffers(devp);
+
+ g->part[minor].start_sect = 0;
+ g->part[minor].nr_sects = 0;
+ if (g->sizes)
+ g->sizes[minor] = 0;
+#ifdef DEVFS_MUST_DIE
+ devfs_register_partitions (g, first_minor, 0);
+#endif
+
+ return 0;
+}
+
+int blkpg_ioctl(kdev_t dev, struct blkpg_ioctl_arg *arg)
+{
+ struct blkpg_ioctl_arg a;
+ struct blkpg_partition p;
+ int len;
+
+ if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
+ return -EFAULT;
+
+ switch (a.op) {
+ case BLKPG_ADD_PARTITION:
+ case BLKPG_DEL_PARTITION:
+ len = a.datalen;
+ if (len < sizeof(struct blkpg_partition))
+ return -EINVAL;
+ if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
+ return -EFAULT;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (a.op == BLKPG_ADD_PARTITION)
+ return add_partition(dev, &p);
+ else
+ return del_partition(dev, &p);
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Common ioctl's for block devices
+ */
+
+int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg)
+{
+#if 1
+ printk("May want to check out blk_ioctl...\n");
+ return -EINVAL;
+#else
+ struct gendisk *g;
+ u64 ullval = 0;
+ int intval;
+
+ if (!dev)
+ return -EINVAL;
+
+ switch (cmd) {
+ case BLKROSET:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (get_user(intval, (int *)(arg)))
+ return -EFAULT;
+ set_device_ro(dev, intval);
+ return 0;
+ case BLKROGET:
+ intval = (is_read_only(dev) != 0);
+ return put_user(intval, (int *)(arg));
+
+ case BLKRASET:
+ if(!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if(arg > 0xff)
+ return -EINVAL;
+ read_ahead[MAJOR(dev)] = arg;
+ return 0;
+ case BLKRAGET:
+ if (!arg)
+ return -EINVAL;
+ return put_user(read_ahead[MAJOR(dev)], (long *) arg);
+
+ case BLKFLSBUF:
+ if(!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ fsync_dev(dev);
+ invalidate_buffers(dev);
+ return 0;
+
+ case BLKSSZGET:
+ /* get block device sector size as needed e.g. by fdisk */
+ intval = get_hardsect_size(dev);
+ return put_user(intval, (int *) arg);
+
+ case BLKGETSIZE:
+ case BLKGETSIZE64:
+ g = get_gendisk(dev);
+ if (g)
+ ullval = g->part[MINOR(dev)].nr_sects;
+
+ if (cmd == BLKGETSIZE)
+ return put_user((unsigned long)ullval, (unsigned long *)arg);
+ else
+ return put_user(ullval << 9, (u64 *)arg);
+#if 0
+ case BLKRRPART: /* Re-read partition tables */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ return reread_partitions(dev, 1);
+#endif
+
+ case BLKPG:
+ return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg);
+
+ case BLKELVGET:
+ return blkelvget_ioctl(&blk_get_queue(dev)->elevator,
+ (blkelv_ioctl_arg_t *) arg);
+ case BLKELVSET:
+ return blkelvset_ioctl(&blk_get_queue(dev)->elevator,
+ (blkelv_ioctl_arg_t *) arg);
+
+ case BLKBSZGET:
+ /* get the logical block size (cf. BLKSSZGET) */
+ intval = BLOCK_SIZE;
+ if (blksize_size[MAJOR(dev)])
+ intval = blksize_size[MAJOR(dev)][MINOR(dev)];
+ return put_user (intval, (int *) arg);
+
+ case BLKBSZSET:
+ /* set the logical block size */
+ if (!capable (CAP_SYS_ADMIN))
+ return -EACCES;
+ if (!dev || !arg)
+ return -EINVAL;
+ if (get_user (intval, (int *) arg))
+ return -EFAULT;
+ if (intval > PAGE_SIZE || intval < 512 ||
+ (intval & (intval - 1)))
+ return -EINVAL;
+ if (is_mounted (dev) || is_swap_partition (dev))
+ return -EBUSY;
+ set_blocksize (dev, intval);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+#endif
+}
+
+EXPORT_SYMBOL(blk_ioctl);
diff --git a/xen/drivers/block/elevator.c b/xen/drivers/block/elevator.c
new file mode 100644
index 0000000000..281e8f8b8d
--- /dev/null
+++ b/xen/drivers/block/elevator.c
@@ -0,0 +1,224 @@
+/*
+ * linux/drivers/block/elevator.c
+ *
+ * Block device elevator/IO-scheduler.
+ *
+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *
+ * 30042000 Jens Axboe <axboe@suse.de> :
+ *
+ * Split the elevator a bit so that it is possible to choose a different
+ * one or even write a new "plug in". There are three pieces:
+ * - elevator_fn, inserts a new request in the queue list
+ * - elevator_merge_fn, decides whether a new buffer can be merged with
+ * an existing request
+ * - elevator_dequeue_fn, called when a request is taken off the active list
+ *
+ * 20082000 Dave Jones <davej@suse.de> :
+ * Removed tests for max-bomb-segments, which was breaking elvtune
+ * when run without -bN
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+/*#include <xeno/fs.h>*/
+#include <xeno/blkdev.h>
+#include <xeno/elevator.h>
+#include <xeno/blk.h>
+#include <xeno/module.h>
+#include <asm/uaccess.h>
+
+/*
+ * This is a bit tricky. It's given that bh and rq are for the same
+ * device, but the next request might of course not be. Run through
+ * the tests below to check if we want to insert here if we can't merge
+ * bh into an existing request
+ */
+inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq,
+ struct list_head *head)
+{
+ struct list_head *next;
+ struct request *next_rq;
+
+ next = rq->queue.next;
+ if (next == head)
+ return 0;
+
+ /*
+ * if the device is different (usually on a different partition),
+ * just check if bh is after rq
+ */
+ next_rq = blkdev_entry_to_request(next);
+ if (next_rq->rq_dev != rq->rq_dev)
+ return bh->b_rsector > rq->sector;
+
+ /*
+ * ok, rq, next_rq and bh are on the same device. if bh is in between
+ * the two, this is the sweet spot
+ */
+ if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector)
+ return 1;
+
+ /*
+ * next_rq is ordered wrt rq, but bh is not in between the two
+ */
+ if (next_rq->sector > rq->sector)
+ return 0;
+
+ /*
+ * next_rq and rq not ordered, if we happen to be either before
+ * next_rq or after rq insert here anyway
+ */
+ if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector)
+ return 1;
+
+ return 0;
+}
+
+
+int elevator_linus_merge(request_queue_t *q, struct request **req,
+ struct list_head * head,
+ struct buffer_head *bh, int rw,
+ int max_sectors)
+{
+ struct list_head *entry = &q->queue_head;
+ unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
+
+ while ((entry = entry->prev) != head) {
+ struct request *__rq = blkdev_entry_to_request(entry);
+
+ /*
+ * simply "aging" of requests in queue
+ */
+ if (__rq->elevator_sequence-- <= 0)
+ break;
+
+ if (__rq->waiting)
+ continue;
+ if (__rq->rq_dev != bh->b_rdev)
+ continue;
+ if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head))
+ *req = __rq;
+ if (__rq->cmd != rw)
+ continue;
+ if (__rq->nr_sectors + count > max_sectors)
+ continue;
+ if (__rq->elevator_sequence < count)
+ break;
+ if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+ ret = ELEVATOR_BACK_MERGE;
+ *req = __rq;
+ break;
+ } else if (__rq->sector - count == bh->b_rsector) {
+ ret = ELEVATOR_FRONT_MERGE;
+ __rq->elevator_sequence -= count;
+ *req = __rq;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count)
+{
+ struct list_head *entry = &req->queue, *head = &q->queue_head;
+
+ /*
+ * second pass scan of requests that got passed over, if any
+ */
+ while ((entry = entry->next) != head) {
+ struct request *tmp = blkdev_entry_to_request(entry);
+ tmp->elevator_sequence -= count;
+ }
+}
+
+void elevator_linus_merge_req(struct request *req, struct request *next)
+{
+ if (next->elevator_sequence < req->elevator_sequence)
+ req->elevator_sequence = next->elevator_sequence;
+}
+
+/*
+ * See if we can find a request that this buffer can be coalesced with.
+ */
+int elevator_noop_merge(request_queue_t *q, struct request **req,
+ struct list_head * head,
+ struct buffer_head *bh, int rw,
+ int max_sectors)
+{
+ struct list_head *entry;
+ unsigned int count = bh->b_size >> 9;
+
+ if (list_empty(&q->queue_head))
+ return ELEVATOR_NO_MERGE;
+
+ entry = &q->queue_head;
+ while ((entry = entry->prev) != head) {
+ struct request *__rq = blkdev_entry_to_request(entry);
+
+ if (__rq->cmd != rw)
+ continue;
+ if (__rq->rq_dev != bh->b_rdev)
+ continue;
+ if (__rq->nr_sectors + count > max_sectors)
+ continue;
+ if (__rq->waiting)
+ continue;
+ if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+ *req = __rq;
+ return ELEVATOR_BACK_MERGE;
+ } else if (__rq->sector - count == bh->b_rsector) {
+ *req = __rq;
+ return ELEVATOR_FRONT_MERGE;
+ }
+ }
+
+ *req = blkdev_entry_to_request(q->queue_head.prev);
+ return ELEVATOR_NO_MERGE;
+}
+
+void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {}
+
+void elevator_noop_merge_req(struct request *req, struct request *next) {}
+
+int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
+{
+ blkelv_ioctl_arg_t output;
+
+ output.queue_ID = elevator->queue_ID;
+ output.read_latency = elevator->read_latency;
+ output.write_latency = elevator->write_latency;
+ output.max_bomb_segments = 0;
+
+ if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg)
+{
+ blkelv_ioctl_arg_t input;
+
+ if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t)))
+ return -EFAULT;
+
+ if (input.read_latency < 0)
+ return -EINVAL;
+ if (input.write_latency < 0)
+ return -EINVAL;
+
+ elevator->read_latency = input.read_latency;
+ elevator->write_latency = input.write_latency;
+ return 0;
+}
+
+void elevator_init(elevator_t * elevator, elevator_t type)
+{
+ static unsigned int queue_ID;
+
+ *elevator = type;
+ elevator->queue_ID = queue_ID++;
+}
diff --git a/xen/drivers/block/genhd.c b/xen/drivers/block/genhd.c
new file mode 100644
index 0000000000..427c2cb312
--- /dev/null
+++ b/xen/drivers/block/genhd.c
@@ -0,0 +1,219 @@
+/*
+ * Code extracted from
+ * linux/kernel/hd.c
+ *
+ * Copyright (C) 1991-1998 Linus Torvalds
+ *
+ * devfs support - jj, rgooch, 980122
+ *
+ * Moved partition checking code to fs/partitions* - Russell King
+ * (linux@arm.uk.linux.org)
+ */
+
+/*
+ * TODO: rip out the remaining init crap from this file --hch
+ */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+/*#include <xeno/fs.h>*/
+#include <xeno/genhd.h>
+#include <xeno/lib.h>
+#include <xeno/blk.h>
+#include <xeno/init.h>
+#include <xeno/spinlock.h>
+
+
+static rwlock_t gendisk_lock;
+
+/*
+ * Global kernel list of partitioning information.
+ *
+ * XXX: you should _never_ access this directly.
+ * the only reason this is exported is source compatiblity.
+ */
+/*static*/ struct gendisk *gendisk_head;
+static struct gendisk *gendisk_array[MAX_BLKDEV];
+
+EXPORT_SYMBOL(gendisk_head);
+
+
+/**
+ * add_gendisk - add partitioning information to kernel list
+ * @gp: per-device partitioning information
+ *
+ * This function registers the partitioning information in @gp
+ * with the kernel.
+ */
+void
+add_gendisk(struct gendisk *gp)
+{
+ struct gendisk *sgp;
+
+ write_lock(&gendisk_lock);
+
+ /*
+ * In 2.5 this will go away. Fix the drivers who rely on
+ * old behaviour.
+ */
+
+ for (sgp = gendisk_head; sgp; sgp = sgp->next)
+ {
+ if (sgp == gp)
+ {
+// printk(KERN_ERR "add_gendisk: device major %d is buggy and added a live gendisk!\n",
+// sgp->major)
+ goto out;
+ }
+ }
+ gendisk_array[gp->major] = gp;
+ gp->next = gendisk_head;
+ gendisk_head = gp;
+out:
+ write_unlock(&gendisk_lock);
+}
+
+EXPORT_SYMBOL(add_gendisk);
+
+
+/**
+ * del_gendisk - remove partitioning information from kernel list
+ * @gp: per-device partitioning information
+ *
+ * This function unregisters the partitioning information in @gp
+ * with the kernel.
+ */
+void
+del_gendisk(struct gendisk *gp)
+{
+ struct gendisk **gpp;
+
+ write_lock(&gendisk_lock);
+ gendisk_array[gp->major] = NULL;
+ for (gpp = &gendisk_head; *gpp; gpp = &((*gpp)->next))
+ if (*gpp == gp)
+ break;
+ if (*gpp)
+ *gpp = (*gpp)->next;
+ write_unlock(&gendisk_lock);
+}
+
+EXPORT_SYMBOL(del_gendisk);
+
+
+/**
+ * get_gendisk - get partitioning information for a given device
+ * @dev: device to get partitioning information for
+ *
+ * This function gets the structure containing partitioning
+ * information for the given device @dev.
+ */
+struct gendisk *
+get_gendisk(kdev_t dev)
+{
+ struct gendisk *gp = NULL;
+ int maj = MAJOR(dev);
+
+ read_lock(&gendisk_lock);
+ if ((gp = gendisk_array[maj]))
+ goto out;
+
+ /* This is needed for early 2.4 source compatiblity. --hch */
+ for (gp = gendisk_head; gp; gp = gp->next)
+ if (gp->major == maj)
+ break;
+out:
+ read_unlock(&gendisk_lock);
+ return gp;
+}
+
+EXPORT_SYMBOL(get_gendisk);
+
+
+/**
+ * walk_gendisk - issue a command for every registered gendisk
+ * @walk: user-specified callback
+ * @data: opaque data for the callback
+ *
+ * This function walks through the gendisk chain and calls back
+ * into @walk for every element.
+ */
+int
+walk_gendisk(int (*walk)(struct gendisk *, void *), void *data)
+{
+ struct gendisk *gp;
+ int error = 0;
+
+ read_lock(&gendisk_lock);
+ for (gp = gendisk_head; gp; gp = gp->next)
+ if ((error = walk(gp, data)))
+ break;
+ read_unlock(&gendisk_lock);
+
+ return error;
+}
+
+
+#ifdef CONFIG_PROC_FS
+int
+get_partition_list(char *page, char **start, off_t offset, int count)
+{
+ struct gendisk *gp;
+ struct hd_struct *hd;
+ char buf[64];
+ int len, n;
+
+ len = sprintf(page, "major minor #blocks name\n\n");
+
+ read_lock(&gendisk_lock);
+ for (gp = gendisk_head; gp; gp = gp->next) {
+ for (n = 0; n < (gp->nr_real << gp->minor_shift); n++) {
+ if (gp->part[n].nr_sects == 0)
+ continue;
+
+ hd = &gp->part[n]; disk_round_stats(hd);
+ len += sprintf(page + len,
+ "%4d %4d %10d %s\n", gp->major,
+ n, gp->sizes[n], disk_name(gp, n, buf));
+
+ if (len < offset)
+ offset -= len, len = 0;
+ else if (len >= offset + count)
+ goto out;
+ }
+ }
+
+out:
+ read_unlock(&gendisk_lock);
+ *start = page + offset;
+ len -= offset;
+ if (len < 0)
+ len = 0;
+ return len > count ? count : len;
+}
+#endif
+
+
+extern int blk_dev_init(void);
+extern int net_dev_init(void);
+extern void console_map_init(void);
+extern int atmdev_init(void);
+
+int __init device_init(void)
+{
+ rwlock_init(&gendisk_lock);
+ blk_dev_init();
+ sti();
+#ifdef CONFIG_NET
+ net_dev_init();
+#endif
+#ifdef CONFIG_ATM
+ (void) atmdev_init();
+#endif
+#ifdef CONFIG_VT
+ console_map_init();
+#endif
+ return 0;
+}
+
+__initcall(device_init);
diff --git a/xen/drivers/block/ll_rw_blk.c b/xen/drivers/block/ll_rw_blk.c
new file mode 100644
index 0000000000..615b332c4b
--- /dev/null
+++ b/xen/drivers/block/ll_rw_blk.c
@@ -0,0 +1,1461 @@
+/*
+ * linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+/*#include <xeno/kernel_stat.h>*/
+#include <xeno/errno.h>
+/*#include <xeno/locks.h>*/
+#include <xeno/mm.h>
+/*#include <xeno/swap.h>*/
+#include <xeno/init.h>
+/*#include <xeno/smp_lock.h>*/
+/*#include <xeno/completion.h>*/
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <xeno/blk.h>
+/*#include <xeno/highmem.h>*/
+#include <xeno/slab.h>
+#include <xeno/module.h>
+
+/*
+ * KAF: We can turn off noise relating to barking guest-OS requests.
+ */
+#if 0
+#define DPRINTK(_f, _a...) printk(_f , ## _a)
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+/* This will die as all synchronous stuff is coming to an end */
+#define complete(_r) panic("completion.h stuff may be needed...")
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ * *request_fn
+ * *current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ * then 512 bytes is assumed.
+ * else
+ * sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+static inline int get_max_sectors(kdev_t dev)
+{
+ if (!max_sectors[MAJOR(dev)])
+ return MAX_SECTORS;
+ return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+inline request_queue_t *blk_get_queue(kdev_t dev)
+{
+ struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+ if (bdev->queue)
+ return bdev->queue(dev);
+ else
+ return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+ struct list_head *head = &list->free;
+ struct request *rq;
+ int i = 0;
+
+ while (!list_empty(head)) {
+ rq = list_entry(head->next, struct request, queue);
+ list_del(&rq->queue);
+ kmem_cache_free(request_cachep, rq);
+ i++;
+ };
+
+ if (i != list->count)
+ printk("request list leak!\n");
+
+ list->count = 0;
+ return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q: the request queue to be released
+ *
+ * Description:
+ * blk_cleanup_queue is the pair to blk_init_queue(). It should
+ * be called when a request queue is being released; typically
+ * when a block device is being de-registered. Currently, its
+ * primary task it to free all the &struct request structures that
+ * were allocated to the queue.
+ * Caveat:
+ * Hopefully the low level driver will have finished any
+ * outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+ int count = q->nr_requests;
+
+ count -= __blk_cleanup_queue(&q->rq[READ]);
+ count -= __blk_cleanup_queue(&q->rq[WRITE]);
+
+ if (count)
+ printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+
+ memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q: The queue which this applies to.
+ * @active: A flag indication where the head of the queue is active.
+ *
+ * Description:
+ * The driver for a block device may choose to leave the currently active
+ * request on the request queue, removing it only when it has completed.
+ * The queue handling routines assume this by default for safety reasons
+ * and will not involve the head of the request queue in any merging or
+ * reordering of requests when the queue is unplugged (and thus may be
+ * working on this particular request).
+ *
+ * If a driver removes requests from the queue before processing them, then
+ * it may indicate that it does so, there by allowing the head of the queue
+ * to be involved in merging and reordering. This is done be calling
+ * blk_queue_headactive() with an @active flag of %0.
+ *
+ * If a driver processes several requests at once, it must remove them (or
+ * at least all but one of them) from the request queue.
+ *
+ * When a queue is plugged the head will be assumed to be inactive.
+ **/
+
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+ q->head_active = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q: the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ * The normal way for &struct buffer_heads to be passed to a device
+ * driver is for them to be collected into requests on a request
+ * queue, and then to allow the device driver to select requests
+ * off that queue when it is ready. This works well for many block
+ * devices. However some block devices (typically virtual devices
+ * such as md or lvm) do not benefit from the processing on the
+ * request queue, and are served best by having the requests passed
+ * directly to them. This can be achieved by providing a function
+ * to blk_queue_make_request().
+ *
+ * Caveat:
+ * The driver that does this *must* be able to deal appropriately
+ * with buffers in "highmemory", either by calling bh_kmap() to get
+ * a kernel mapping, to by calling create_bounce() to create a
+ * buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+ q->make_request_fn = mfn;
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+ if (req->nr_segments < max_segments) {
+ req->nr_segments++;
+ return 1;
+ }
+ return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+ return 1;
+ return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (bh->b_data + bh->b_size == req->bh->b_data)
+ return 1;
+ return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+ struct request *next, int max_segments)
+{
+ int total_segments = req->nr_segments + next->nr_segments;
+
+ if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+ total_segments--;
+
+ if (total_segments > max_segments)
+ return 0;
+
+ req->nr_segments = total_segments;
+ return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+ /*
+ * no need to replug device
+ */
+ if (!list_empty(&q->queue_head) || q->plugged)
+ return;
+
+ q->plugged = 1;
+ queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+ if (q->plugged) {
+ q->plugged = 0;
+ if (!list_empty(&q->queue_head))
+ {
+ q->request_fn(q);
+ }
+ }
+}
+
+void generic_unplug_device(void *data)
+{
+ request_queue_t *q = (request_queue_t *) data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ __generic_unplug_device(q);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/** blk_grow_request_list
+ * @q: The &request_queue_t
+ * @nr_requests: how many requests are desired
+ *
+ * More free requests are added to the queue's free lists, bringing
+ * the total number of requests to @nr_requests.
+ *
+ * The requests are added equally to the request queue's read
+ * and write freelists.
+ *
+ * This function can sleep.
+ *
+ * Returns the (new) number of requests which the queue has available.
+ */
+int blk_grow_request_list(request_queue_t *q, int nr_requests)
+{
+ unsigned long flags;
+ /* Several broken drivers assume that this function doesn't sleep,
+ * this causes system hangs during boot.
+ * As a temporary fix, make the the function non-blocking.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+ while (q->nr_requests < nr_requests) {
+ struct request *rq;
+ int rw;
+
+ rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
+ if (rq == NULL)
+ break;
+ memset(rq, 0, sizeof(*rq));
+ rq->rq_status = RQ_INACTIVE;
+ rw = q->nr_requests & 1;
+ list_add(&rq->queue, &q->rq[rw].free);
+ q->rq[rw].count++;
+ q->nr_requests++;
+ }
+ q->batch_requests = q->nr_requests / 4;
+ if (q->batch_requests > 32)
+ q->batch_requests = 32;
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return q->nr_requests;
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+ /*struct sysinfo si;*/
+ /*int megs;*/ /* Total memory, in megabytes */
+ int nr_requests;
+
+ INIT_LIST_HEAD(&q->rq[READ].free);
+ INIT_LIST_HEAD(&q->rq[WRITE].free);
+ q->rq[READ].count = 0;
+ q->rq[WRITE].count = 0;
+ q->nr_requests = 0;
+
+#if 0
+ si_meminfo(&si);
+ megs = si.totalram >> (20 - PAGE_SHIFT);
+ nr_requests = 128;
+ if (megs < 32)
+ nr_requests /= 2;
+#else
+ nr_requests = 128;
+#endif
+ blk_grow_request_list(q, nr_requests);
+
+#if 0
+ init_waitqueue_head(&q->wait_for_requests[0]);
+ init_waitqueue_head(&q->wait_for_requests[1]);
+#endif
+ spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue - prepare a request queue for use with a block device
+ * @q: The &request_queue_t to be initialised
+ * @rfn: The function to be called to process requests that have been
+ * placed on the queue.
+ *
+ * Description:
+ * If a block device wishes to use the standard request handling procedures,
+ * which sorts requests and coalesces adjacent requests, then it must
+ * call blk_init_queue(). The function @rfn will be called when there
+ * are requests on the queue that need to be processed. If the device
+ * supports plugging, then @rfn may not be called immediately when requests
+ * are available on the queue, but may be called at some time later instead.
+ * Plugged queues are generally unplugged when a buffer belonging to one
+ * of the requests on the queue is needed, or due to memory pressure.
+ *
+ * @rfn is not required, or even expected, to remove all requests off the
+ * queue, but only as many as it can handle at a time. If it does leave
+ * requests on the queue, it is responsible for arranging that the requests
+ * get dealt with eventually.
+ *
+ * A global spin lock $io_request_lock must be held while manipulating the
+ * requests on the request queue.
+ *
+ * The request on the head of the queue is by default assumed to be
+ * potentially active, and it is not considered for re-ordering or merging
+ * whenever the given queue is unplugged. This behaviour can be changed with
+ * blk_queue_headactive().
+ *
+ * Note:
+ * blk_init_queue() must be paired with a blk_cleanup_queue() call
+ * when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+ INIT_LIST_HEAD(&q->queue_head);
+ elevator_init(&q->elevator, ELEVATOR_LINUS);
+ blk_init_free_list(q);
+ q->request_fn = rfn;
+ q->back_merge_fn = ll_back_merge_fn;
+ q->front_merge_fn = ll_front_merge_fn;
+ q->merge_requests_fn = ll_merge_requests_fn;
+ q->make_request_fn = __make_request;
+ q->plug_tq.sync = 0;
+ q->plug_tq.routine = &generic_unplug_device;
+ q->plug_tq.data = q;
+ q->plugged = 0;
+ /*
+ * These booleans describe the queue properties. We set the
+ * default (and most common) values here. Other drivers can
+ * use the appropriate functions to alter the queue properties.
+ * as appropriate.
+ */
+ q->plug_device_fn = generic_plug_device;
+ q->head_active = 1;
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in. Returns NULL if there are no free requests.
+ */
+static struct request *get_request(request_queue_t *q, int rw)
+{
+ struct request *rq = NULL;
+ struct request_list *rl = q->rq + rw;
+
+ if (!list_empty(&rl->free)) {
+ rq = blkdev_free_rq(&rl->free);
+ list_del(&rq->queue);
+ rl->count--;
+ rq->rq_status = RQ_ACTIVE;
+ rq->cmd = rw;
+ rq->special = NULL;
+ rq->q = q;
+ }
+
+ return rq;
+}
+
+/*
+ * Here's the request allocation design:
+ *
+ * 1: Blocking on request exhaustion is a key part of I/O throttling.
+ *
+ * 2: We want to be `fair' to all requesters. We must avoid starvation, and
+ * attempt to ensure that all requesters sleep for a similar duration. Hence
+ * no stealing requests when there are other processes waiting.
+ *
+ * 3: We also wish to support `batching' of requests. So when a process is
+ * woken, we want to allow it to allocate a decent number of requests
+ * before it blocks again, so they can be nicely merged (this only really
+ * matters if the process happens to be adding requests near the head of
+ * the queue).
+ *
+ * 4: We want to avoid scheduling storms. This isn't really important, because
+ * the system will be I/O bound anyway. But it's easy.
+ *
+ * There is tension between requirements 2 and 3. Once a task has woken,
+ * we don't want to allow it to sleep as soon as it takes its second request.
+ * But we don't want currently-running tasks to steal all the requests
+ * from the sleepers. We handle this with wakeup hysteresis around
+ * 0 .. batch_requests and with the assumption that request taking is much,
+ * much faster than request freeing.
+ *
+ * So here's what we do:
+ *
+ * a) A READA requester fails if free_requests < batch_requests
+ *
+ * We don't want READA requests to prevent sleepers from ever
+ * waking. Note that READA is used extremely rarely - a few
+ * filesystems use it for directory readahead.
+ *
+ * When a process wants a new request:
+ *
+ * b) If free_requests == 0, the requester sleeps in FIFO manner.
+ *
+ * b) If 0 < free_requests < batch_requests and there are waiters,
+ * we still take a request non-blockingly. This provides batching.
+ *
+ * c) If free_requests >= batch_requests, the caller is immediately
+ * granted a new request.
+ *
+ * When a request is released:
+ *
+ * d) If free_requests < batch_requests, do nothing.
+ *
+ * f) If free_requests >= batch_requests, wake up a single waiter.
+ *
+ * The net effect is that when a process is woken at the batch_requests level,
+ * it will be able to take approximately (batch_requests) requests before
+ * blocking again (at the tail of the queue).
+ *
+ * This all assumes that the rate of taking requests is much, much higher
+ * than the rate of releasing them. Which is very true.
+ *
+ * -akpm, Feb 2002.
+ */
+
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+#if 0
+ register struct request *rq;
+ /*DECLARE_WAITQUEUE(wait, current);*/
+
+ generic_unplug_device(q);
+ add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait);
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (q->rq[rw].count == 0)
+ schedule();
+ spin_lock_irq(&io_request_lock);
+ rq = get_request(q,rw);
+ spin_unlock_irq(&io_request_lock);
+ } while (rq == NULL);
+ remove_wait_queue(&q->wait_for_requests[rw], &wait);
+ current->state = TASK_RUNNING;
+ return rq;
+#else
+ panic("__get_request_wait shouldn't be depended on");
+ return 0;
+#endif
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+ int minor,major;
+
+ major = MAJOR(dev);
+ minor = MINOR(dev);
+ if (major < 0 || major >= MAX_BLKDEV) return 0;
+ return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+ int minor,major;
+
+ major = MAJOR(dev);
+ minor = MINOR(dev);
+ if (major < 0 || major >= MAX_BLKDEV) return;
+ if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+ else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+ unsigned long nr_sectors, int new_io)
+{
+ /*unsigned int major = MAJOR(dev);*/
+ unsigned int index;
+
+ index = disk_index(dev);
+#if 0
+ if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+ return;
+#endif
+
+#if 0
+ kstat.dk_drive[major][index] += new_io;
+ if (rw == READ) {
+ kstat.dk_drive_rio[major][index] += new_io;
+ kstat.dk_drive_rblk[major][index] += nr_sectors;
+ } else if (rw == WRITE) {
+ kstat.dk_drive_wio[major][index] += new_io;
+ kstat.dk_drive_wblk[major][index] += nr_sectors;
+ } else
+ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+#endif
+}
+
+/* Return up to two hd_structs on which to do IO accounting for a given
+ * request. On a partitioned device, we want to account both against
+ * the partition and against the whole disk. */
+static void locate_hd_struct(struct request *req,
+ struct hd_struct **hd1,
+ struct hd_struct **hd2)
+{
+ struct gendisk *gd;
+
+ *hd1 = NULL;
+ *hd2 = NULL;
+
+ gd = get_gendisk(req->rq_dev);
+ if (gd && gd->part) {
+ /* Mask out the partition bits: account for the entire disk */
+ int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
+ int whole_minor = devnr << gd->minor_shift;
+ *hd1 = &gd->part[whole_minor];
+ if (whole_minor != MINOR(req->rq_dev))
+ *hd2= &gd->part[MINOR(req->rq_dev)];
+ }
+}
+
+/* Round off the performance stats on an hd_struct. The average IO
+ * queue length and utilisation statistics are maintained by observing
+ * the current state of the queue length and the amount of time it has
+ * been in this state for. Normally, that accounting is done on IO
+ * completion, but that can result in more than a second's worth of IO
+ * being accounted for within any one second, leading to >100%
+ * utilisation. To deal with that, we do a round-off before returning
+ * the results when reading /proc/partitions, accounting immediately for
+ * all queue usage up to the current jiffies and restarting the counters
+ * again. */
+void disk_round_stats(struct hd_struct *hd)
+{
+ unsigned long now = jiffies;
+
+ hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
+ hd->last_queue_change = now;
+
+ if (hd->ios_in_flight)
+ hd->io_ticks += (now - hd->last_idle_time);
+ hd->last_idle_time = now;
+}
+
+
+static inline void down_ios(struct hd_struct *hd)
+{
+ disk_round_stats(hd);
+ --hd->ios_in_flight;
+}
+
+static inline void up_ios(struct hd_struct *hd)
+{
+ disk_round_stats(hd);
+ ++hd->ios_in_flight;
+}
+
+static void account_io_start(struct hd_struct *hd, struct request *req,
+ int merge, int sectors)
+{
+ switch (req->cmd) {
+ case READ:
+ if (merge)
+ hd->rd_merges++;
+ hd->rd_sectors += sectors;
+ break;
+ case WRITE:
+ if (merge)
+ hd->wr_merges++;
+ hd->wr_sectors += sectors;
+ break;
+ }
+ if (!merge)
+ up_ios(hd);
+}
+
+static void account_io_end(struct hd_struct *hd, struct request *req)
+{
+ unsigned long duration = jiffies - req->start_time;
+ switch (req->cmd) {
+ case READ:
+ hd->rd_ticks += duration;
+ hd->rd_ios++;
+ break;
+ case WRITE:
+ hd->wr_ticks += duration;
+ hd->wr_ios++;
+ break;
+ }
+ down_ios(hd);
+}
+
+void req_new_io(struct request *req, int merge, int sectors)
+{
+ struct hd_struct *hd1, *hd2;
+ locate_hd_struct(req, &hd1, &hd2);
+ if (hd1)
+ account_io_start(hd1, req, merge, sectors);
+ if (hd2)
+ account_io_start(hd2, req, merge, sectors);
+}
+
+void req_finished_io(struct request *req)
+{
+ struct hd_struct *hd1, *hd2;
+ locate_hd_struct(req, &hd1, &hd2);
+ if (hd1)
+ account_io_end(hd1, req);
+ if (hd2)
+ account_io_end(hd2, req);
+}
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+ struct list_head *insert_here)
+{
+ drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+ if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+ spin_unlock_irq(&io_request_lock);
+ BUG();
+ }
+
+ /*
+ * elevator indicated where it wants this request to be
+ * inserted at elevator_merge time
+ */
+ list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+void blkdev_release_request(struct request *req)
+{
+ request_queue_t *q = req->q;
+ int rw = req->cmd;
+
+ req->rq_status = RQ_INACTIVE;
+ req->q = NULL;
+
+ /*
+ * Request may not have originated from ll_rw_blk. if not,
+ * assume it has free buffers and check waiters
+ */
+ if (q) {
+ list_add(&req->queue, &q->rq[rw].free);
+#if 0
+ if (++q->rq[rw].count >= q->batch_requests &&
+ waitqueue_active(&q->wait_for_requests[rw]))
+ wake_up(&q->wait_for_requests[rw]);
+#endif
+ }
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ struct request *next;
+ struct hd_struct *hd1, *hd2;
+
+ next = blkdev_next_request(req);
+ if (req->sector + req->nr_sectors != next->sector)
+ return;
+ if (req->cmd != next->cmd
+ || req->rq_dev != next->rq_dev
+ || req->nr_sectors + next->nr_sectors > max_sectors
+ || next->waiting)
+ return;
+ /*
+ * If we are not allowed to merge these requests, then
+ * return. If we are allowed to merge, then the count
+ * will have been updated to the appropriate number,
+ * and we shouldn't do it here too.
+ */
+ if (!q->merge_requests_fn(q, req, next, max_segments))
+ return;
+
+ q->elevator.elevator_merge_req_fn(req, next);
+ req->bhtail->b_reqnext = next->bh;
+ req->bhtail = next->bhtail;
+ req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+ list_del(&next->queue);
+
+ /* One last thing: we have removed a request, so we now have one
+ less expected IO to complete for accounting purposes. */
+
+ locate_hd_struct(req, &hd1, &hd2);
+ if (hd1)
+ down_ios(hd1);
+ if (hd2)
+ down_ios(hd2);
+ blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ if (&req->queue == q->queue_head.prev)
+ return;
+ attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+ struct list_head * head,
+ struct request *req,
+ int max_sectors,
+ int max_segments)
+{
+ struct list_head * prev;
+
+ prev = req->queue.prev;
+ if (head == prev)
+ return;
+ attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+ struct buffer_head * bh)
+{
+ unsigned int sector, count;
+ int max_segments = MAX_SEGMENTS;
+ struct request * req, *freereq = NULL;
+ int rw_ahead, max_sectors, el_ret;
+ struct list_head *head, *insert_here;
+ int latency;
+ elevator_t *elevator = &q->elevator;
+
+ count = bh->b_size >> 9;
+ sector = bh->b_rsector;
+
+ rw_ahead = 0; /* normal case; gets changed below for READA */
+ switch (rw) {
+ case READA:
+#if 0 /* bread() misinterprets failed READA attempts as IO errors on SMP */
+ rw_ahead = 1;
+#endif
+ rw = READ; /* drop into READ */
+ case READ:
+ case WRITE:
+ latency = elevator_request_latency(elevator, rw);
+ break;
+ default:
+ BUG();
+ goto end_io;
+ }
+
+ /* We'd better have a real physical mapping!
+ Check this bit only if the buffer was dirty and just locked
+ down by us so at this point flushpage will block and
+ won't clear the mapped bit under us. */
+ if (!buffer_mapped(bh))
+ BUG();
+
+ /*
+ * Temporary solution - in 2.5 this will be done by the lowlevel
+ * driver. Create a bounce buffer if the buffer data points into
+ * high memory - keep the original buffer otherwise.
+ */
+#if CONFIG_HIGHMEM
+ bh = create_bounce(rw, bh);
+#endif
+
+/* look for a free request. */
+ /*
+ * Try to coalesce the new request with old requests
+ */
+ max_sectors = get_max_sectors(bh->b_rdev);
+
+again:
+ req = NULL;
+ head = &q->queue_head;
+ /*
+ * Now we acquire the request spinlock, we have to be mega careful
+ * not to schedule or do something nonatomic
+ */
+ spin_lock_irq(&io_request_lock);
+
+ insert_here = head->prev;
+ if (list_empty(head)) {
+ q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+ goto get_rq;
+ } else if (q->head_active && !q->plugged)
+ head = head->next;
+
+ el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+ switch (el_ret) {
+
+ case ELEVATOR_BACK_MERGE:
+ if (!q->back_merge_fn(q, req, bh, max_segments)) {
+ insert_here = &req->queue;
+ break;
+ }
+ elevator->elevator_merge_cleanup_fn(q, req, count);
+ req->bhtail->b_reqnext = bh;
+ req->bhtail = bh;
+ req->nr_sectors = req->hard_nr_sectors += count;
+ blk_started_io(count);
+ drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+ req_new_io(req, 1, count);
+ attempt_back_merge(q, req, max_sectors, max_segments);
+ goto out;
+
+ case ELEVATOR_FRONT_MERGE:
+ if (!q->front_merge_fn(q, req, bh, max_segments)) {
+ insert_here = req->queue.prev;
+ break;
+ }
+ elevator->elevator_merge_cleanup_fn(q, req, count);
+ bh->b_reqnext = req->bh;
+ req->bh = bh;
+ req->buffer = bh->b_data;
+ req->current_nr_sectors = count;
+ req->sector = req->hard_sector = sector;
+ req->nr_sectors = req->hard_nr_sectors += count;
+ blk_started_io(count);
+ drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+ req_new_io(req, 1, count);
+ attempt_front_merge(q, head, req, max_sectors, max_segments);
+ goto out;
+
+ /*
+ * elevator says don't/can't merge. get new request
+ */
+ case ELEVATOR_NO_MERGE:
+ /*
+ * use elevator hints as to where to insert the
+ * request. if no hints, just add it to the back
+ * of the queue
+ */
+ if (req)
+ insert_here = &req->queue;
+ break;
+
+ default:
+ printk("elevator returned crap (%d)\n", el_ret);
+ BUG();
+ }
+
+get_rq:
+ if (freereq) {
+ req = freereq;
+ freereq = NULL;
+ } else {
+ /*
+ * See description above __get_request_wait()
+ */
+ if (rw_ahead) {
+ if (q->rq[rw].count < q->batch_requests) {
+ spin_unlock_irq(&io_request_lock);
+ goto end_io;
+ }
+ req = get_request(q, rw);
+ if (req == NULL)
+ BUG();
+ } else {
+ req = get_request(q, rw);
+ if (req == NULL) {
+ spin_unlock_irq(&io_request_lock);
+ freereq = __get_request_wait(q, rw);
+ goto again;
+ }
+ }
+ }
+
+/* fill up the request-info, and add it to the queue */
+ req->elevator_sequence = latency;
+ req->cmd = rw;
+ req->errors = 0;
+ req->hard_sector = req->sector = sector;
+ req->hard_nr_sectors = req->nr_sectors = count;
+ req->current_nr_sectors = count;
+ req->nr_segments = 1; /* Always 1 for a new request. */
+ req->nr_hw_segments = 1; /* Always 1 for a new request. */
+ req->buffer = bh->b_data;
+ req->waiting = NULL;
+ req->bh = bh;
+ req->bhtail = bh;
+ req->rq_dev = bh->b_rdev;
+ req->start_time = jiffies;
+ req_new_io(req, 0, count);
+ blk_started_io(count);
+ add_request(q, req, insert_here);
+out:
+ if (freereq)
+ blkdev_release_request(freereq);
+ spin_unlock_irq(&io_request_lock);
+ return 0;
+end_io:
+ bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+ return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw: READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh: The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value. The
+ * %READ and %WRITE options are (hopefully) obvious in meaning. The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status. The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled. BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector. So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+ int major = MAJOR(bh->b_rdev);
+ int minorsize = 0;
+ request_queue_t *q;
+
+ if (!bh->b_end_io)
+ BUG();
+
+ /* Test device size, when known. */
+ if (blk_size[major])
+ minorsize = blk_size[major][MINOR(bh->b_rdev)];
+ if (minorsize) {
+ unsigned long maxsector = (minorsize << 1) + 1;
+ unsigned long sector = bh->b_rsector;
+ unsigned int count = bh->b_size >> 9;
+
+ if (maxsector < count || maxsector - count < sector) {
+ /* Yecch */
+ bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
+
+ /* This may well happen - the kernel calls bread()
+ without checking the size of the device, e.g.,
+ when mounting a device. */
+ DPRINTK(KERN_INFO
+ "attempt to access beyond end of device\n");
+ DPRINTK(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+ kdevname(bh->b_rdev), rw,
+ (sector + count)>>1, minorsize);
+
+ /* Yecch again */
+ bh->b_end_io(bh, 0);
+ return;
+ }
+ }
+
+ /*
+ * Resolve the mapping until finished. (drivers are
+ * still free to implement/resolve their own stacking
+ * by explicitly returning 0)
+ */
+ /* NOTE: we don't repeat the blk_size check for each new device.
+ * Stacking drivers are expected to know what they are doing.
+ */
+ do {
+ q = blk_get_queue(bh->b_rdev);
+ if (!q || !q->make_request_fn) {
+ DPRINTK(KERN_ERR
+ "generic_make_request: Trying to access "
+ "nonexistent block-device %s (%ld)\n",
+ kdevname(bh->b_rdev), bh->b_rsector);
+ buffer_IO_error(bh);
+ break;
+ }
+ } while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+ if (!test_bit(BH_Lock, &bh->b_state))
+ BUG();
+
+ set_bit(BH_Req, &bh->b_state);
+ set_bit(BH_Launder, &bh->b_state);
+
+ /*
+ * First step, 'identity mapping' - RAID or LVM might
+ * further remap this.
+ */
+ bh->b_rdev = bh->b_dev;
+ /* bh->b_rsector = bh->b_blocknr * count; */
+
+ generic_make_request(rw, bh);
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache. In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request. Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters. As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ * All of the buffers must be for the same device, and must also be
+ * of the current approved size for the device. */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+ unsigned int major;
+ int correct_size;
+ int i;
+
+ if (!nr)
+ return;
+
+ major = MAJOR(bhs[0]->b_dev);
+
+ /* Determine correct block size for this device. */
+ correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+ /* Verify requested block sizes. */
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+ if (bh->b_size % correct_size) {
+ DPRINTK(KERN_NOTICE "ll_rw_block: device %s: "
+ "only %d-char blocks implemented (%u)\n",
+ kdevname(bhs[0]->b_dev),
+ correct_size, bh->b_size);
+ goto sorry;
+ }
+ }
+
+ if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+ DPRINTK(KERN_NOTICE "Can't write to read-only device %s\n",
+ kdevname(bhs[0]->b_dev));
+ goto sorry;
+ }
+
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+
+ /* Only one thread can actually submit the I/O. */
+ if (test_and_set_bit(BH_Lock, &bh->b_state))
+ continue;
+
+ /* We have the buffer lock */
+ atomic_inc(&bh->b_count);
+
+ switch(rw) {
+ case WRITE:
+ if (!atomic_set_buffer_clean(bh))
+ /* Hmmph! Nothing to write */
+ goto end_io;
+ /* __mark_buffer_clean(bh); */
+ break;
+
+ case READA:
+ case READ:
+ if (buffer_uptodate(bh))
+ /* Hmmph! Already have it */
+ goto end_io;
+ break;
+ default:
+ BUG();
+ end_io:
+ bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+ continue;
+ }
+
+ submit_bh(rw, bh);
+ }
+ return;
+
+sorry:
+ /* Make sure we don't get infinite dirty retries.. */
+ for (i = 0; i < nr; i++)
+ mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req: the request being processed
+ * @uptodate: 0 for I/O error
+ * @name: the name printed for an I/O error
+ *
+ * Description:
+ * Ends I/O on the first buffer attached to @req, and sets it up
+ * for the next buffer_head (if any) in the cluster.
+ *
+ * Return:
+ * 0 - we are done with this request, call end_that_request_last()
+ * 1 - still buffers pending for this request
+ *
+ * Caveat:
+ * Drivers implementing their own end_request handling must call
+ * blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+ struct buffer_head * bh;
+ int nsect;
+
+ req->errors = 0;
+ if (!uptodate)
+ printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+ kdevname(req->rq_dev), name, req->sector);
+
+ if ((bh = req->bh) != NULL) {
+ nsect = bh->b_size >> 9;
+ blk_finished_io(nsect);
+ req->bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh, uptodate);
+ if ((bh = req->bh) != NULL) {
+ req->hard_sector += nsect;
+ req->hard_nr_sectors -= nsect;
+ req->sector = req->hard_sector;
+ req->nr_sectors = req->hard_nr_sectors;
+
+ req->current_nr_sectors = bh->b_size >> 9;
+ if (req->nr_sectors < req->current_nr_sectors) {
+ req->nr_sectors = req->current_nr_sectors;
+ printk("end_request: buffer-list destroyed\n");
+ }
+ req->buffer = bh->b_data;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void end_that_request_last(struct request *req)
+{
+ if (req->waiting != NULL)
+ complete(req->waiting);
+ req_finished_io(req);
+
+ blkdev_release_request(req);
+}
+
+int __init blk_dev_init(void)
+{
+ struct blk_dev_struct *dev;
+
+ request_cachep = kmem_cache_create("blkdev_requests",
+ sizeof(struct request),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+ if (!request_cachep)
+ panic("Can't create request pool slab cache\n");
+
+ for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+ dev->queue = NULL;
+
+ memset(ro_bits,0,sizeof(ro_bits));
+ memset(max_readahead, 0, sizeof(max_readahead));
+ memset(max_sectors, 0, sizeof(max_sectors));
+
+#ifdef CONFIG_AMIGA_Z2RAM
+ z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+ stram_device_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+ isp16_init();
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
+ ide_init(); /* this MUST precede hd_init */
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
+ hd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+ ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+ xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+ mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+ { extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+ swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+ swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+ amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+ atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+ floppy_init();
+#else
+#if defined(__i386__) /* Do we even need this? */
+ outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+ cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+ acsi_init();
+#endif
+#ifdef CONFIG_MCD
+ mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+ mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+ sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+ aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+ sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+ gscd_init();
+#endif
+#ifdef CONFIG_CM206
+ cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+ optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+ sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+ ap_init();
+#endif
+#ifdef CONFIG_DDV
+ ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+ mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+ dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+ tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+ xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+ jsfd_init();
+#endif
+ return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_grow_request_list);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(req_finished_io);
+EXPORT_SYMBOL(generic_unplug_device);
diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c
new file mode 100644
index 0000000000..805fd9e1ae
--- /dev/null
+++ b/xen/drivers/block/xen_block.c
@@ -0,0 +1,402 @@
+/*
+ * xen-block.c
+ *
+ * process incoming block io requests from guestos's.
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/blkdev.h>
+#include <xeno/event.h>
+#include <hypervisor-ifs/block.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <asm-i386/io.h>
+#include <asm/spinlock.h>
+#include <xeno/keyhandler.h>
+#include <xeno/interrupt.h>
+
+#if 0
+#define DPRINTK(_f, _a...) printk( _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent
+ * requests pulled from a communication ring are quite likely to end
+ * up being part of the same scatter/gather request at the disc.
+ * It might be a good idea to add scatter/gather support explicitly to
+ * the scatter/gather ring (eg. each request has an array of N pointers);
+ * then these values would better reflect real costs at the disc.
+ */
+#define MAX_PENDING_REQS 32
+#define BATCH_PER_DOMAIN 8
+
+static kmem_cache_t *buffer_head_cachep;
+static atomic_t nr_pending;
+
+static void io_schedule(unsigned long unused);
+static int do_block_io_op_domain(struct task_struct *p, int max_to_do);
+static void dispatch_rw_block_io(struct task_struct *p, int index);
+static void dispatch_probe_block_io(struct task_struct *p, int index);
+static void dispatch_debug_block_io(struct task_struct *p, int index);
+static void make_response(struct task_struct *p, void *id, unsigned long st);
+
+
+/******************************************************************
+ * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head io_schedule_list;
+static spinlock_t io_schedule_list_lock;
+
+static int __on_blkdev_list(struct task_struct *p)
+{
+ return p->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(struct task_struct *p)
+{
+ unsigned long flags;
+ if ( !__on_blkdev_list(p) ) return;
+ spin_lock_irqsave(&io_schedule_list_lock, flags);
+ if ( __on_blkdev_list(p) )
+ {
+ list_del(&p->blkdev_list);
+ p->blkdev_list.next = NULL;
+ }
+ spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(struct task_struct *p)
+{
+ unsigned long flags;
+ if ( __on_blkdev_list(p) ) return;
+ spin_lock_irqsave(&io_schedule_list_lock, flags);
+ if ( !__on_blkdev_list(p) )
+ {
+ list_add_tail(&p->blkdev_list, &io_schedule_list);
+ }
+ spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
+
+static void io_schedule(unsigned long unused)
+{
+ struct task_struct *p;
+ struct list_head *ent;
+
+ /* Queue up a batch of requests. */
+ while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
+ !list_empty(&io_schedule_list) )
+ {
+ ent = io_schedule_list.next;
+ p = list_entry(ent, struct task_struct, blkdev_list);
+ remove_from_blkdev_list(p);
+ if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
+ add_to_blkdev_list_tail(p);
+ }
+
+ /* Push the batch through to disc. */
+ run_task_queue(&tq_disk);
+}
+
+static void maybe_trigger_io_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
+ !list_empty(&io_schedule_list) )
+ {
+ tasklet_schedule(&io_schedule_tasklet);
+ }
+}
+
+
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
+{
+ struct pfn_info *page;
+ unsigned long pfn;
+
+ for ( pfn = virt_to_phys(bh->b_data) >> PAGE_SHIFT;
+ pfn < ((virt_to_phys(bh->b_data) + bh->b_size + PAGE_SIZE - 1) >>
+ PAGE_SHIFT);
+ pfn++ )
+ {
+ page = frame_table + pfn;
+ if ( ((bh->b_state & (1 << BH_Read)) != 0) &&
+ (put_page_type(page) == 0) )
+ page->flags &= ~PG_type_mask;
+ put_page_tot(page);
+ }
+
+ atomic_dec(&nr_pending);
+ make_response(bh->b_xen_domain, bh->b_xen_id, uptodate ? 0 : 1);
+
+ kmem_cache_free(buffer_head_cachep, bh);
+
+ maybe_trigger_io_schedule();
+}
+
+
+
+/******************************************************************
+ * GUEST-OS SYSCALL -- Indicates there are requests outstanding.
+ */
+
+long do_block_io_op(void)
+{
+ add_to_blkdev_list_tail(current);
+ maybe_trigger_io_schedule();
+ return 0L;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int do_block_io_op_domain(struct task_struct* p, int max_to_do)
+{
+ blk_ring_t *blk_ring = p->blk_ring_base;
+ int i, more_to_do = 0;
+
+ for ( i = p->blk_req_cons;
+ i != blk_ring->req_prod;
+ i = BLK_RING_INC(i) )
+ {
+ if ( max_to_do-- == 0 )
+ {
+ more_to_do = 1;
+ break;
+ }
+
+ switch ( blk_ring->ring[i].req.operation )
+ {
+ case XEN_BLOCK_READ:
+ case XEN_BLOCK_WRITE:
+ dispatch_rw_block_io(p, i);
+ break;
+
+ case XEN_BLOCK_PROBE:
+ dispatch_probe_block_io(p, i);
+ break;
+
+ case XEN_BLOCK_DEBUG:
+ dispatch_debug_block_io(p, i);
+ break;
+
+ default:
+ panic("error: unknown block io operation [%d]\n",
+ blk_ring->ring[i].req.operation);
+ }
+ }
+
+ p->blk_req_cons = i;
+ return more_to_do;
+}
+
+static void dispatch_debug_block_io(struct task_struct *p, int index)
+{
+ DPRINTK("dispatch_debug_block_io: unimplemented\n");
+}
+
+static void dispatch_probe_block_io(struct task_struct *p, int index)
+{
+ extern void ide_probe_devices(xen_disk_info_t *xdi);
+ blk_ring_t *blk_ring = p->blk_ring_base;
+ xen_disk_info_t *xdi;
+
+ xdi = phys_to_virt((unsigned long)blk_ring->ring[index].req.buffer);
+ ide_probe_devices(xdi);
+
+ make_response(p, blk_ring->ring[index].req.id, 0);
+}
+
+static void dispatch_rw_block_io(struct task_struct *p, int index)
+{
+ extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
+ blk_ring_t *blk_ring = p->blk_ring_base;
+ struct buffer_head *bh;
+ int operation;
+ unsigned short size;
+ unsigned long buffer, pfn;
+ struct pfn_info *page;
+
+ operation = (blk_ring->ring[index].req.operation == XEN_BLOCK_WRITE) ?
+ WRITE : READ;
+
+ /* Sectors are 512 bytes. Make sure request size is a multiple. */
+ size = blk_ring->ring[index].req.block_size;
+ if ( (size == 0) || (size & (0x200 - 1)) != 0 )
+ {
+ DPRINTK("dodgy block size: %d\n",
+ blk_ring->ring[index].req.block_size);
+ goto bad_descriptor;
+ }
+
+ /* Buffer address should be sector aligned. */
+ buffer = (unsigned long)blk_ring->ring[index].req.buffer;
+ if ( (buffer & (0x200 - 1)) != 0 )
+ {
+ DPRINTK("unaligned buffer %08lx\n", buffer);
+ goto bad_descriptor;
+ }
+
+ /* A request may span multiple page frames. Each must be checked. */
+ for ( pfn = buffer >> PAGE_SHIFT;
+ pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ pfn++ )
+ {
+ /* Each frame must be within bounds of machine memory. */
+ if ( pfn >= max_page )
+ {
+ DPRINTK("pfn out of range: %08lx\n", pfn);
+ goto bad_descriptor;
+ }
+
+ page = frame_table + pfn;
+
+ /* Each frame must belong to the requesting domain. */
+ if ( (page->flags & PG_domain_mask) != p->domain )
+ {
+ DPRINTK("bad domain: expected %d, got %ld\n",
+ p->domain, page->flags & PG_domain_mask);
+ goto bad_descriptor;
+ }
+
+ /* If reading into the frame, the frame must be writeable. */
+ if ( operation == READ )
+ {
+ if ( (page->flags & PG_type_mask) != PGT_writeable_page )
+ {
+ DPRINTK("non-writeable page passed for block read\n");
+ goto bad_descriptor;
+ }
+ get_page_type(page);
+ }
+
+ /* Xen holds a frame reference until the operation is complete. */
+ get_page_tot(page);
+ }
+
+ atomic_inc(&nr_pending);
+ bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+ if ( bh == NULL ) panic("bh is null\n");
+
+ /* set just the important bits of the buffer header */
+ memset (bh, 0, sizeof (struct buffer_head));
+
+ bh->b_blocknr = blk_ring->ring[index].req.block_number;
+ bh->b_size = size;
+ bh->b_dev = blk_ring->ring[index].req.device;
+ bh->b_rsector = blk_ring->ring[index].req.sector_number;
+ bh->b_data = phys_to_virt(buffer);
+ bh->b_count.counter = 1;
+ bh->b_end_io = end_block_io_op;
+
+ /* Save meta data about request. */
+ bh->b_xen_domain = p;
+ bh->b_xen_id = blk_ring->ring[index].req.id;
+
+ if ( operation == WRITE )
+ {
+ bh->b_state = (1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
+ (1 << BH_Dirty) | (1 << BH_Uptodate) | (1 << BH_Write);
+ }
+ else
+ {
+ bh->b_state = (1 << BH_Mapped) | (1 << BH_Read);
+ }
+
+ /* Dispatch a single request. We'll flush it to disc later. */
+ ll_rw_block(operation, 1, &bh);
+ return;
+
+ bad_descriptor:
+ make_response(p, blk_ring->ring[index].req.id, 1);
+ return;
+}
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+static void make_response(struct task_struct *p, void *id, unsigned long st)
+{
+ unsigned long cpu_mask, flags;
+ int position;
+ blk_ring_t *blk_ring;
+
+ /* Place on the response ring for the relevant domain. */
+ spin_lock_irqsave(&p->blk_ring_lock, flags);
+ blk_ring = p->blk_ring_base;
+ position = blk_ring->resp_prod;
+ blk_ring->ring[position].resp.id = id;
+ blk_ring->ring[position].resp.status = st;
+ blk_ring->resp_prod = BLK_RING_INC(position);
+ spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+
+ /* Kick the relevant domain. */
+ cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
+ guest_event_notify(cpu_mask);
+}
+
+static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ printk("Dumping block queue stats: nr_pending = %d\n",
+ atomic_read(&nr_pending));
+}
+
+/* Start-of-day initialisation for a new domain. */
+void init_blkdev_info(struct task_struct *p)
+{
+ if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
+ p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
+ clear_page(p->blk_ring_base);
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain);
+ p->blkdev_list.next = NULL;
+}
+
+/* End-of-day teardown for a domain. XXX Outstanding requests? */
+void destroy_blkdev_info(struct task_struct *p)
+{
+ remove_from_blkdev_list(p);
+ UNSHARE_PFN(virt_to_page(p->blk_ring_base));
+ free_page((unsigned long)p->blk_ring_base);
+}
+
+void initialize_block_io ()
+{
+ atomic_set(&nr_pending, 0);
+
+ spin_lock_init(&io_schedule_list_lock);
+ INIT_LIST_HEAD(&io_schedule_list);
+
+ buffer_head_cachep = kmem_cache_create(
+ "buffer_head_cache", sizeof(struct buffer_head),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+ add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");
+}
diff --git a/xen/drivers/char/Makefile b/xen/drivers/char/Makefile
new file mode 100644
index 0000000000..574b7d2d79
--- /dev/null
+++ b/xen/drivers/char/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o driver.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen/drivers/char/xen_kbd.c b/xen/drivers/char/xen_kbd.c
new file mode 100644
index 0000000000..dc9379a06c
--- /dev/null
+++ b/xen/drivers/char/xen_kbd.c
@@ -0,0 +1,191 @@
+#include <asm-i386/io.h>
+#include <xeno/sched.h> /* this has request_irq() proto for some reason */
+#include <xeno/keyhandler.h>
+
+#define KEYBOARD_IRQ 1
+
+#define KBD_STATUS_REG 0x64 /* Status register (R) */
+#define KBD_CNTL_REG 0x64 /* Controller command register (W) */
+#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */
+
+/* register status bits */
+#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */
+#define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */
+#define KBD_STAT_SELFTEST 0x04 /* Self test successful */
+#define KBD_STAT_CMD 0x08 /* Last write was a command write (0=data) */
+
+#define KBD_STAT_UNLOCKED 0x10 /* Zero if keyboard locked */
+#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */
+#define KBD_STAT_GTO 0x40 /* General receive/xmit timeout */
+#define KBD_STAT_PERR 0x80 /* Parity error */
+
+#define kbd_read_input() inb(KBD_DATA_REG)
+#define kbd_read_status() inb(KBD_STATUS_REG)
+
+
+static int keyboard_shift = 0;
+static int keyboard_control = 0;
+static int keyboard_echo = 0;
+
+/* the following is pretty gross...
+ * stop reading if you don't want to throw up!
+ */
+
+static unsigned char keymap_normal[] =
+{
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 ,'1','2', '3','4','5','6', '7','8','9','0', '-','=','\b','\t',
+ 'q','w','e','r', 't','y','u','i', 'o','p','[',']','\r', 0 ,'a','s',
+ 'd','f','g','h', 'j','k','l',';', '\'','`', 0 ,'#', 'z','x','c','v',
+ 'b','n','m',',', '.','/', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 ,'\\', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+};
+
+static unsigned char keymap_shift[] =
+{
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 ,'!','"', '#','$','%','^', '&','*','(',')', '_','+','\b','\t',
+ 'Q','W','E','R', 'T','Y','U','I', 'O','P','{','}','\r', 0 ,'A','S',
+ 'D','F','G','H', 'J','K','L',':', '@', 0 , 0 ,'~', 'Z','X','C','V',
+ 'B','N','M','<', '>','?', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 ,'|', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+};
+
+
+static unsigned char keymap_control[] =
+{ /* same as normal, except for a-z -> 1 to 26 */
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 ,'1','2', '3','4','5','6', '7','8','9','0', '-','=','\b','\t',
+ 17, 23, 5 , 18, 20, 25, 21, 9 , 15, 16,'[',']','\r', 0 , 1 , 19,
+ 4 , 6 , 7 , 8 , 10, 11, 12,';', '\'','`', 0 ,'#', 26, 24, 3 , 22,
+ 2 , 14, 13,',', '.','/', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 ,'\\', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+};
+
+
+static unsigned char convert_scancode (unsigned char scancode)
+{
+ unsigned char value = 0;
+
+ switch (scancode) {
+
+ case 0xbb: /* F1 */
+ keyboard_echo = !keyboard_echo;
+ break;
+
+ case 0xba: /* caps lock UP */
+ case 0x9d: /* ctrl (left) UP */
+ keyboard_control = 0;
+ break;
+
+ case 0x3a: /* caps lock DOWN */
+ case 0x1d: /* ctrl (left) DOWN */
+ keyboard_control = 1;
+ break;
+
+ case 0xaa: /* shift (left) UP */
+ case 0xb6: /* shift (right) UP */
+ keyboard_shift = 0;
+ break;
+
+ case 0x2a: /* shift (left) DOWN */
+ case 0x36: /* shift (right) DOWN */
+ keyboard_shift = 1;
+ break;
+
+ default: /* normal keys */
+ if (keyboard_control)
+ value = keymap_control[scancode];
+ else if (keyboard_shift)
+ value = keymap_shift[scancode];
+ else
+ value = keymap_normal[scancode];
+
+ }
+
+ if (value && keyboard_echo) printk ("%c", value);
+
+ return value;
+}
+
+static void keyboard_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned char status = kbd_read_status();
+ unsigned int work = 10000;
+
+ while ((--work > 0) && (status & KBD_STAT_OBF))
+ {
+ unsigned char scancode;
+ scancode = kbd_read_input();
+
+ if (!(status & (KBD_STAT_GTO | KBD_STAT_PERR)))
+ {
+ if (status & KBD_STAT_MOUSE_OBF)
+ /* mouse event, ignore */;
+ else {
+ unsigned char key;
+ key_handler *handler;
+
+ if((key = convert_scancode (scancode)) &&
+ (handler = get_key_handler(key)))
+ (*handler)(key, dev_id, regs);
+
+ }
+ }
+ status = kbd_read_status();
+ }
+
+ if (!work)
+ printk(KERN_ERR "pc_keyb: controller jammed (0x%02X).\n", status);
+
+ return;
+}
+
+
+void initialize_keyboard()
+{
+ if(request_irq(KEYBOARD_IRQ, keyboard_interrupt, 0, "keyboard", NULL))
+ printk("initialize_keyboard: failed to alloc IRQ %d\n", KEYBOARD_IRQ);
+
+ return;
+}
+
diff --git a/xen/drivers/char/xen_serial.c b/xen/drivers/char/xen_serial.c
new file mode 100644
index 0000000000..7b7e4a7b80
--- /dev/null
+++ b/xen/drivers/char/xen_serial.c
@@ -0,0 +1,103 @@
+#include <asm-i386/io.h>
+#include <xeno/sched.h> /* this has request_irq() proto for some reason */
+#include <xeno/keyhandler.h>
+#include <xeno/reboot.h>
+
+/* Register offsets */
+#define NS16550_RBR 0x00 /* receive buffer */
+#define NS16550_THR 0x00 /* transmit holding */
+#define NS16550_IER 0x01 /* interrupt enable */
+#define NS16550_IIR 0x02 /* interrupt identity */
+#define NS16550_FCR 0x02 /* FIFO control */
+#define NS16550_LCR 0x03 /* line control */
+#define NS16550_MCR 0x04 /* MODEM control */
+#define NS16550_LSR 0x05 /* line status */
+#define NS16550_MSR 0x06 /* MODEM status */
+#define NS16550_SCR 0x07 /* scratch */
+#define NS16550_DDL 0x00 /* divisor latch (ls) ( DLAB=1) */
+#define NS16550_DLM 0x01 /* divisor latch (ms) ( DLAB=1) */
+
+/* Interrupt enable register */
+#define NS16550_IER_ERDAI 0x01 /* rx data recv'd */
+#define NS16550_IER_ETHREI 0x02 /* tx reg. empty */
+#define NS16550_IER_ELSI 0x04 /* rx line status */
+#define NS16550_IER_EMSI 0x08 /* MODEM status */
+
+/* FIFO control register */
+#define NS16550_FCR_ENABLE 0x01 /* enable FIFO */
+#define NS16550_FCR_CLRX 0x02 /* clear Rx FIFO */
+#define NS16550_FCR_CLTX 0x04 /* clear Tx FIFO */
+#define NS16550_FCR_DMA 0x10 /* enter DMA mode */
+#define NS16550_FCR_TRG1 0x00 /* Rx FIFO trig lev 1 */
+#define NS16550_FCR_TRG4 0x40 /* Rx FIFO trig lev 4 */
+#define NS16550_FCR_TRG8 0x80 /* Rx FIFO trig lev 8 */
+#define NS16550_FCR_TRG14 0xc0 /* Rx FIFO trig lev 14 */
+
+/* MODEM control register */
+#define NS16550_MCR_DTR 0x01 /* Data Terminal Ready */
+#define NS16550_MCR_RTS 0x02 /* Request to Send */
+#define NS16550_MCR_OUT1 0x04 /* OUT1: unused */
+#define NS16550_MCR_OUT2 0x08 /* OUT2: interrupt mask */
+#define NS16550_MCR_LOOP 0x10 /* Loop */
+
+#define SERIAL_BASE 0x3f8 /* XXX SMH: horrible hardwired COM1 */
+
+
+
+static int serial_echo = 0; /* default is not to echo; change with 'e' */
+
+void toggle_echo(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ serial_echo = !serial_echo;
+ return;
+}
+
+
+
+static void serial_rx_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ u_char c;
+ key_handler *handler;
+
+ /* XXX SMH: should probably check this is an RX interrupt :-) */
+
+ /* clear the interrupt by reading the character */
+ c = inb(SERIAL_BASE + NS16550_RBR );
+
+ /* if there's a handler, call it: we trust it won't screw us too badly */
+ if((handler = get_key_handler(c)) != NULL)
+ (*handler)(c, dev_id, regs);
+
+ if(serial_echo)
+ printk("%c", c);
+
+ return;
+}
+
+void initialize_serial()
+{
+ int fifo, rc;
+
+ /* setup key handler */
+ add_key_handler('~', toggle_echo, "toggle serial echo");
+
+ /* Should detect this, but must be a ns16550a at least, surely? */
+ fifo = 1;
+ if(fifo) {
+ /* Clear FIFOs, enable, trigger at 1 byte */
+ outb(NS16550_FCR_TRG1 | NS16550_FCR_ENABLE |
+ NS16550_FCR_CLRX | NS16550_FCR_CLTX, SERIAL_BASE+NS16550_FCR);
+ }
+
+ outb(NS16550_MCR_OUT2, SERIAL_BASE + NS16550_MCR); /* Modem control */
+ outb(NS16550_IER_ERDAI, SERIAL_BASE + NS16550_IER ); /* Setup interrupts */
+
+ /* XXX SMH: this is a hack; probably is IRQ4 but grab both anyway */
+ if((rc = request_irq(4, serial_rx_int, 0, "serial", (void *)0x1234)))
+ printk("initialize_serial: failed to get IRQ4, rc=%d\n", rc);
+
+ if((rc = request_irq(3, serial_rx_int, 0, "serial", (void *)0x1234)))
+ printk("initialize_serial: failed to get IRQ3, rc=%d\n", rc);
+
+ return;
+}
diff --git a/xen/drivers/ide/Makefile b/xen/drivers/ide/Makefile
new file mode 100644
index 0000000000..574b7d2d79
--- /dev/null
+++ b/xen/drivers/ide/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o driver.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen/drivers/ide/ide-disk.c b/xen/drivers/ide/ide-disk.c
new file mode 100644
index 0000000000..0d1cd113cd
--- /dev/null
+++ b/xen/drivers/ide/ide-disk.c
@@ -0,0 +1,1555 @@
+/*
+ * linux/drivers/ide/ide-disk.c Version 1.10 June 9, 2000
+ *
+ * Copyright (C) 1994-1998 Linus Torvalds & authors (see below)
+ */
+
+/*
+ * Mostly written by Mark Lord <mlord@pobox.com>
+ * and Gadi Oxman <gadio@netvision.net.il>
+ * and Andre Hedrick <andre@linux-ide.org>
+ *
+ * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
+ *
+ * Version 1.00 move disk only code from ide.c to ide-disk.c
+ * support optional byte-swapping of all data
+ * Version 1.01 fix previous byte-swapping code
+ * Version 1.02 remove ", LBA" from drive identification msgs
+ * Version 1.03 fix display of id->buf_size for big-endian
+ * Version 1.04 add /proc configurable settings and S.M.A.R.T support
+ * Version 1.05 add capacity support for ATA3 >= 8GB
+ * Version 1.06 get boot-up messages to show full cyl count
+ * Version 1.07 disable door-locking if it fails
+ * Version 1.08 fixed CHS/LBA translations for ATA4 > 8GB,
+ * process of adding new ATA4 compliance.
+ * fixed problems in allowing fdisk to see
+ * the entire disk.
+ * Version 1.09 added increment of rq->sector in ide_multwrite
+ * added UDMA 3/4 reporting
+ * Version 1.10 request queue changes, Ultra DMA 100
+ * Version 1.11 added 48-bit lba
+ * Version 1.12 adding taskfile io access method
+ */
+
+#define IDEDISK_VERSION "1.12"
+
+#undef REALLY_SLOW_IO /* most systems can safely undef this */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/slab.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+#define IS_PDC4030_DRIVE (HWIF(drive)->chipset == ide_pdc4030)
+#else
+#define IS_PDC4030_DRIVE (0) /* auto-NULLs out pdc4030 code */
+#endif
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+# undef __TASKFILE__IO /* define __TASKFILE__IO */
+#else /* CONFIG_IDE_TASKFILE_IO */
+# undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#ifndef __TASKFILE__IO
+
+static void idedisk_bswap_data (void *buffer, int wcount)
+{
+ u16 *p = buffer;
+
+ while (wcount--) {
+ *p = *p << 8 | *p >> 8; p++;
+ *p = *p << 8 | *p >> 8; p++;
+ }
+}
+
+static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+ ide_input_data(drive, buffer, wcount);
+ if (drive->bswap)
+ idedisk_bswap_data(buffer, wcount);
+}
+
+static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+ if (drive->bswap) {
+ idedisk_bswap_data(buffer, wcount);
+ ide_output_data(drive, buffer, wcount);
+ idedisk_bswap_data(buffer, wcount);
+ } else
+ ide_output_data(drive, buffer, wcount);
+}
+
+#endif /* __TASKFILE__IO */
+
+/*
+ * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity"
+ * value for this drive (from its reported identification information).
+ *
+ * Returns: 1 if lba_capacity looks sensible
+ * 0 otherwise
+ *
+ * It is called only once for each drive.
+ */
+static int lba_capacity_is_ok (struct hd_driveid *id)
+{
+ unsigned long lba_sects, chs_sects, head, tail;
+
+ if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+ printk("48-bit Drive: %llu \n", id->lba_capacity_2);
+ return 1;
+ }
+
+ /*
+ * The ATA spec tells large drives to return
+ * C/H/S = 16383/16/63 independent of their size.
+ * Some drives can be jumpered to use 15 heads instead of 16.
+ * Some drives can be jumpered to use 4092 cyls instead of 16383.
+ */
+ if ((id->cyls == 16383
+ || (id->cyls == 4092 && id->cur_cyls == 16383)) &&
+ id->sectors == 63 &&
+ (id->heads == 15 || id->heads == 16) &&
+ id->lba_capacity >= 16383*63*id->heads)
+ return 1;
+
+ lba_sects = id->lba_capacity;
+ chs_sects = id->cyls * id->heads * id->sectors;
+
+ /* perform a rough sanity check on lba_sects: within 10% is OK */
+ if ((lba_sects - chs_sects) < chs_sects/10)
+ return 1;
+
+ /* some drives have the word order reversed */
+ head = ((lba_sects >> 16) & 0xffff);
+ tail = (lba_sects & 0xffff);
+ lba_sects = (head | (tail << 16));
+ if ((lba_sects - chs_sects) < chs_sects/10) {
+ id->lba_capacity = lba_sects;
+ return 1; /* lba_capacity is (now) good */
+ }
+
+ return 0; /* lba_capacity value may be bad */
+}
+
+#ifndef __TASKFILE__IO
+
+/*
+ * read_intr() is the handler for disk read/multread interrupts
+ */
+static ide_startstop_t read_intr (ide_drive_t *drive)
+{
+ byte stat;
+ int i;
+ unsigned int msect, nsect;
+ struct request *rq;
+
+ /* new way for dealing with premature shared PCI interrupts */
+ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) {
+ if (stat & (ERR_STAT|DRQ_STAT)) {
+ return ide_error(drive, "read_intr", stat);
+ }
+ /* no data yet, so wait for another interrupt */
+ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ msect = drive->mult_count;
+
+read_next:
+ rq = HWGROUP(drive)->rq;
+ if (msect) {
+ if ((nsect = rq->current_nr_sectors) > msect)
+ nsect = msect;
+ msect -= nsect;
+ } else
+ nsect = 1;
+ idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS);
+#ifdef DEBUG
+ printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n",
+ drive->name, rq->sector, rq->sector+nsect-1,
+ (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
+#endif
+ rq->sector += nsect;
+ rq->buffer += nsect<<9;
+ rq->errors = 0;
+ i = (rq->nr_sectors -= nsect);
+ if (((long)(rq->current_nr_sectors -= nsect)) <= 0)
+ ide_end_request(1, HWGROUP(drive));
+ if (i > 0) {
+ if (msect)
+ goto read_next;
+ ide_set_handler (drive, &read_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ return ide_stopped;
+}
+
+/*
+ * write_intr() is the handler for disk write interrupts
+ */
+static ide_startstop_t write_intr (ide_drive_t *drive)
+{
+ byte stat;
+ int i;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ struct request *rq = hwgroup->rq;
+
+ if (!OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+ printk("%s: write_intr error1: nr_sectors=%ld, stat=0x%02x\n", drive->name, rq->nr_sectors, stat);
+ } else {
+#ifdef DEBUG
+ printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n",
+ drive->name, rq->sector, (unsigned long) rq->buffer,
+ rq->nr_sectors-1);
+#endif
+ if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
+ rq->sector++;
+ rq->buffer += 512;
+ rq->errors = 0;
+ i = --rq->nr_sectors;
+ --rq->current_nr_sectors;
+ if (((long)rq->current_nr_sectors) <= 0)
+ ide_end_request(1, hwgroup);
+ if (i > 0) {
+ idedisk_output_data (drive, rq->buffer, SECTOR_WORDS);
+ ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ return ide_stopped;
+ }
+ return ide_stopped; /* the original code did this here (?) */
+ }
+ return ide_error(drive, "write_intr", stat);
+}
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multiple-sector write operation.
+ *
+ * Returns 0 on success.
+ *
+ * Note that we may be called from two contexts - the do_rw_disk context
+ * and IRQ context. The IRQ can happen any time after we've output the
+ * full "mcount" number of sectors, so we must make sure we update the
+ * state _before_ we output the final part of the data!
+ */
+int ide_multwrite (ide_drive_t *drive, unsigned int mcount)
+{
+ ide_hwgroup_t *hwgroup= HWGROUP(drive);
+ struct request *rq = &hwgroup->wrq;
+
+ do {
+ char *buffer;
+ int nsect = rq->current_nr_sectors;
+
+ if (nsect > mcount)
+ nsect = mcount;
+ mcount -= nsect;
+ buffer = rq->buffer;
+
+ rq->sector += nsect;
+ rq->buffer += nsect << 9;
+ rq->nr_sectors -= nsect;
+ rq->current_nr_sectors -= nsect;
+
+ /* Do we move to the next bh after this? */
+ if (!rq->current_nr_sectors) {
+ struct buffer_head *bh = rq->bh->b_reqnext;
+
+ /* end early early we ran out of requests */
+ if (!bh) {
+ mcount = 0;
+ } else {
+ rq->bh = bh;
+ rq->current_nr_sectors = bh->b_size >> 9;
+ rq->buffer = bh->b_data;
+ }
+ }
+
+ /*
+ * Ok, we're all setup for the interrupt
+ * re-entering us on the last transfer.
+ */
+ idedisk_output_data(drive, buffer, nsect<<7);
+ } while (mcount);
+
+ return 0;
+}
+
+/*
+ * multwrite_intr() is the handler for disk multwrite interrupts
+ */
+static ide_startstop_t multwrite_intr (ide_drive_t *drive)
+{
+ byte stat;
+ int i;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ struct request *rq = &hwgroup->wrq;
+
+ if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+ if (stat & DRQ_STAT) {
+ /*
+ * The drive wants data. Remember rq is the copy
+ * of the request
+ */
+ if (rq->nr_sectors) {
+ if (ide_multwrite(drive, drive->mult_count))
+ return ide_stopped;
+ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ } else {
+ /*
+ * If the copy has all the blocks completed then
+ * we can end the original request.
+ */
+ if (!rq->nr_sectors) { /* all done? */
+ rq = hwgroup->rq;
+ for (i = rq->nr_sectors; i > 0;){
+ i -= rq->current_nr_sectors;
+ ide_end_request(1, hwgroup);
+ }
+ return ide_stopped;
+ }
+ }
+ return ide_stopped; /* the original code did this here (?) */
+ }
+ return ide_error(drive, "multwrite_intr", stat);
+}
+#endif /* __TASKFILE__IO */
+
+#ifdef __TASKFILE__IO
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block);
+
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ if (rq->cmd == READ)
+ goto good_command;
+ if (rq->cmd == WRITE)
+ goto good_command;
+
+ printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+ ide_end_request(0, HWGROUP(drive));
+ return ide_stopped;
+
+good_command:
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (IS_PDC4030_DRIVE) {
+ extern ide_startstop_t promise_rw_disk(ide_drive_t *, struct request *, unsigned long);
+ return promise_rw_disk(drive, rq, block);
+ }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) /* 48-bit LBA */
+ return lba_48_rw_disk(drive, rq, (unsigned long long) block);
+ if (drive->select.b.lba) /* 28-bit LBA */
+ return lba_28_rw_disk(drive, rq, (unsigned long) block);
+
+ /* 28-bit CHS : DIE DIE DIE piece of legacy crap!!! */
+ return chs_rw_disk(drive, rq, (unsigned long) block);
+}
+
+static task_ioreg_t get_command (ide_drive_t *drive, int cmd)
+{
+ int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0;
+
+#if 1
+ lba48bit = drive->addressing;
+#endif
+
+ if ((cmd == READ) && (drive->using_dma))
+ return (lba48bit) ? WIN_READDMA_EXT : WIN_READDMA;
+ else if ((cmd == READ) && (drive->mult_count))
+ return (lba48bit) ? WIN_MULTREAD_EXT : WIN_MULTREAD;
+ else if (cmd == READ)
+ return (lba48bit) ? WIN_READ_EXT : WIN_READ;
+ else if ((cmd == WRITE) && (drive->using_dma))
+ return (lba48bit) ? WIN_WRITEDMA_EXT : WIN_WRITEDMA;
+ else if ((cmd == WRITE) && (drive->mult_count))
+ return (lba48bit) ? WIN_MULTWRITE_EXT : WIN_MULTWRITE;
+ else if (cmd == WRITE)
+ return (lba48bit) ? WIN_WRITE_EXT : WIN_WRITE;
+ else
+ return WIN_NOP;
+}
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_task_t args;
+
+ task_ioreg_t command = get_command(drive, rq->cmd);
+ unsigned int track = (block / drive->sect);
+ unsigned int sect = (block % drive->sect) + 1;
+ unsigned int head = (track % drive->head);
+ unsigned int cyl = (track / drive->head);
+
+ memset(&taskfile, 0, sizeof(task_struct_t));
+ memset(&hobfile, 0, sizeof(hob_struct_t));
+
+ taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+ taskfile.sector_number = sect;
+ taskfile.low_cylinder = cyl;
+ taskfile.high_cylinder = (cyl>>8);
+ taskfile.device_head = head;
+ taskfile.device_head |= drive->select.all;
+ taskfile.command = command;
+
+ /* #ifdef DEBUG */
+ printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+ if (lba) printk("LBAsect=%lld, ", block);
+ else printk("CHS=%d/%d/%d, ", cyl, head, sect);
+ printk("sectors=%ld, ", rq->nr_sectors);
+ printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+ /* #endif*/
+
+ memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+ memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+ args.command_type = ide_cmd_type_parser(&args);
+ args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile);
+ args.handler = ide_handler_parser(&taskfile, &hobfile);
+ args.posthandler = NULL;
+ args.rq = (struct request *) rq;
+ args.block = block;
+ rq->special = NULL;
+ rq->special = (ide_task_t *)&args;
+
+ return do_rw_taskfile(drive, &args);
+}
+
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_task_t args;
+
+ task_ioreg_t command = get_command(drive, rq->cmd);
+
+ memset(&taskfile, 0, sizeof(task_struct_t));
+ memset(&hobfile, 0, sizeof(hob_struct_t));
+
+ taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+ taskfile.sector_number = block;
+ taskfile.low_cylinder = (block>>=8);
+ taskfile.high_cylinder = (block>>=8);
+ taskfile.device_head = ((block>>8)&0x0f);
+ taskfile.device_head |= drive->select.all;
+ taskfile.command = command;
+
+
+#ifdef DEBUG
+ printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+ if (lba) printk("LBAsect=%lld, ", block);
+ else printk("CHS=%d/%d/%d, ", cyl, head, sect);
+ printk("sectors=%ld, ", rq->nr_sectors);
+ printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+ memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+ memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+ args.command_type = ide_cmd_type_parser(&args);
+ args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile);
+ args.handler = ide_handler_parser(&taskfile, &hobfile);
+ args.posthandler = NULL;
+ args.rq = (struct request *) rq;
+ args.block = block;
+ rq->special = NULL;
+ rq->special = (ide_task_t *)&args;
+
+ return do_rw_taskfile(drive, &args);
+}
+
+/*
+ * 268435455 == 137439 MB or 28bit limit
+ * 320173056 == 163929 MB or 48bit addressing
+ * 1073741822 == 549756 MB or 48bit addressing fake drive
+ */
+
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_task_t args;
+
+ task_ioreg_t command = get_command(drive, rq->cmd);
+
+ memset(&taskfile, 0, sizeof(task_struct_t));
+ memset(&hobfile, 0, sizeof(hob_struct_t));
+
+ taskfile.sector_count = rq->nr_sectors;
+ hobfile.sector_count = (rq->nr_sectors>>8);
+
+ if (rq->nr_sectors == 65536) {
+ taskfile.sector_count = 0x00;
+ hobfile.sector_count = 0x00;
+ }
+
+ taskfile.sector_number = block; /* low lba */
+ taskfile.low_cylinder = (block>>=8); /* mid lba */
+ taskfile.high_cylinder = (block>>=8); /* hi lba */
+ hobfile.sector_number = (block>>=8); /* low lba */
+ hobfile.low_cylinder = (block>>=8); /* mid lba */
+ hobfile.high_cylinder = (block>>=8); /* hi lba */
+ taskfile.device_head = drive->select.all;
+ hobfile.device_head = taskfile.device_head;
+ hobfile.control = (drive->ctl|0x80);
+ taskfile.command = command;
+
+#ifdef DEBUG
+ printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+ if (lba) printk("LBAsect=%lld, ", block);
+ else printk("CHS=%d/%d/%d, ", cyl, head, sect);
+ printk("sectors=%ld, ", rq->nr_sectors);
+ printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+ memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+ memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+ args.command_type = ide_cmd_type_parser(&args);
+ args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile);
+ args.handler = ide_handler_parser(&taskfile, &hobfile);
+ args.posthandler = NULL;
+ args.rq = (struct request *) rq;
+ args.block = block;
+ rq->special = NULL;
+ rq->special = (ide_task_t *)&args;
+
+ return do_rw_taskfile(drive, &args);
+}
+
+#else /* !__TASKFILE__IO */
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (drive->select.b.lba || IS_PDC4030_DRIVE) {
+#else /* !CONFIG_BLK_DEV_PDC4030 */
+ if (drive->select.b.lba) {
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+ task_ioreg_t tasklets[10];
+
+ tasklets[0] = 0;
+ tasklets[1] = 0;
+ tasklets[2] = rq->nr_sectors;
+ tasklets[3] = (rq->nr_sectors>>8);
+ if (rq->nr_sectors == 65536) {
+ tasklets[2] = 0x00;
+ tasklets[3] = 0x00;
+ }
+ tasklets[4] = (task_ioreg_t) block;
+ tasklets[5] = (task_ioreg_t) (block>>8);
+ tasklets[6] = (task_ioreg_t) (block>>16);
+ tasklets[7] = (task_ioreg_t) (block>>24);
+ tasklets[8] = (task_ioreg_t) 0;
+ tasklets[9] = (task_ioreg_t) 0;
+// tasklets[8] = (task_ioreg_t) (block>>32);
+// tasklets[9] = (task_ioreg_t) (block>>40);
+ /*#ifdef DEBUG */
+ printk("[A]\n");
+ printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n",
+ drive->name,
+ (rq->cmd==READ)?"read":"writ",
+ block,
+ rq->nr_sectors,
+ (unsigned long) rq->buffer,
+ block);
+ printk("%s: 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n",
+ drive->name, tasklets[3], tasklets[2],
+ tasklets[9], tasklets[8], tasklets[7],
+ tasklets[6], tasklets[5], tasklets[4]);
+ /* #endif */
+ OUT_BYTE(tasklets[1], IDE_FEATURE_REG);
+ OUT_BYTE(tasklets[3], IDE_NSECTOR_REG);
+ OUT_BYTE(tasklets[7], IDE_SECTOR_REG);
+ OUT_BYTE(tasklets[8], IDE_LCYL_REG);
+ OUT_BYTE(tasklets[9], IDE_HCYL_REG);
+
+ OUT_BYTE(tasklets[0], IDE_FEATURE_REG);
+ OUT_BYTE(tasklets[2], IDE_NSECTOR_REG);
+ OUT_BYTE(tasklets[4], IDE_SECTOR_REG);
+ OUT_BYTE(tasklets[5], IDE_LCYL_REG);
+ OUT_BYTE(tasklets[6], IDE_HCYL_REG);
+ OUT_BYTE(0x00|drive->select.all,IDE_SELECT_REG);
+ } else {
+#ifdef DEBUG
+ printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
+ drive->name, (rq->cmd==READ)?"read":"writ",
+ block, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+ OUT_BYTE(0x00, IDE_FEATURE_REG);
+ OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+ OUT_BYTE(block,IDE_SECTOR_REG);
+ OUT_BYTE(block>>=8,IDE_LCYL_REG);
+ OUT_BYTE(block>>=8,IDE_HCYL_REG);
+ OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG);
+ }
+ } else {
+ unsigned int sect,head,cyl,track;
+ track = block / drive->sect;
+ sect = block % drive->sect + 1;
+ OUT_BYTE(sect,IDE_SECTOR_REG);
+ head = track % drive->head;
+ cyl = track / drive->head;
+
+ OUT_BYTE(0x00, IDE_FEATURE_REG);
+ OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+ OUT_BYTE(cyl,IDE_LCYL_REG);
+ OUT_BYTE(cyl>>8,IDE_HCYL_REG);
+ OUT_BYTE(head|drive->select.all,IDE_SELECT_REG);
+#ifdef DEBUG
+ printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
+ drive->name, (rq->cmd==READ)?"read":"writ", cyl,
+ head, sect, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+ }
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (IS_PDC4030_DRIVE) {
+ extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *);
+ return do_pdc4030_io (drive, rq);
+ }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+ if (rq->cmd == READ) {
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive)))
+ return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+ OUT_BYTE(drive->mult_count ? WIN_MULTREAD_EXT : WIN_READ_EXT, IDE_COMMAND_REG);
+ } else {
+ OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG);
+ }
+ return ide_started;
+ }
+ if (rq->cmd == WRITE) {
+ ide_startstop_t startstop;
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
+ return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+ OUT_BYTE(drive->mult_count ? WIN_MULTWRITE_EXT : WIN_WRITE_EXT, IDE_COMMAND_REG);
+ } else {
+ OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG);
+ }
+ if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+ printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name,
+ drive->mult_count ? "MULTWRITE" : "WRITE");
+ return startstop;
+ }
+ if (!drive->unmask)
+ __cli(); /* local CPU only */
+ if (drive->mult_count) {
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ /*
+ * Ugh.. this part looks ugly because we MUST set up
+ * the interrupt handler before outputting the first block
+ * of data to be written. If we hit an error (corrupted buffer list)
+ * in ide_multwrite(), then we need to remove the handler/timer
+ * before returning. Fortunately, this NEVER happens (right?).
+ *
+ * Except when you get an error it seems...
+ */
+ hwgroup->wrq = *rq; /* scratchpad */
+ ide_set_handler(drive, &multwrite_intr, WAIT_CMD, NULL);
+ if (ide_multwrite(drive, drive->mult_count)) {
+ unsigned long flags;
+ spin_lock_irqsave(&io_request_lock, flags);
+ hwgroup->handler = NULL;
+ del_timer(&hwgroup->timer);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return ide_stopped;
+ }
+ } else {
+ ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+ idedisk_output_data(drive, rq->buffer, SECTOR_WORDS);
+ }
+ return ide_started;
+ }
+ printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+ ide_end_request(0, HWGROUP(drive));
+ return ide_stopped;
+}
+
+#endif /* __TASKFILE__IO */
+
+static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+ MOD_INC_USE_COUNT;
+ if (drive->removable && drive->usage == 1) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.command = WIN_DOORLOCK;
+ check_disk_change(inode->i_rdev);
+ /*
+ * Ignore the return code from door_lock,
+ * since the open() has already succeeded,
+ * and the door_lock is irrelevant at this point.
+ */
+ if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+ drive->doorlocking = 0;
+ }
+ return 0;
+}
+
+static int do_idedisk_flushcache(ide_drive_t *drive);
+
+static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+ if (drive->removable && !drive->usage) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.command = WIN_DOORUNLOCK;
+ invalidate_bdev(inode->i_bdev, 0);
+ if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+ drive->doorlocking = 0;
+ }
+ if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+ if (do_idedisk_flushcache(drive))
+ printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+ drive->name);
+ MOD_DEC_USE_COUNT;
+}
+
+static int idedisk_media_change (ide_drive_t *drive)
+{
+ return drive->removable; /* if removable, always assume it was changed */
+}
+
+static void idedisk_revalidate (ide_drive_t *drive)
+{
+ grok_partitions(HWIF(drive)->gd, drive->select.b.unit,
+ 1<<PARTN_BITS,
+ current_capacity(drive));
+}
+
+/*
+ * Queries for true maximum capacity of the drive.
+ * Returns maximum LBA address (> 0) of the drive, 0 if failed.
+ */
+static unsigned long idedisk_read_native_max_address(ide_drive_t *drive)
+{
+ ide_task_t args;
+ unsigned long addr = 0;
+
+ if (!(drive->id->command_set_1 & 0x0400) &&
+ !(drive->id->cfs_enable_2 & 0x0100))
+ return addr;
+
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+ args.tfRegister[IDE_SELECT_OFFSET] = 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX;
+ args.handler = task_no_data_intr;
+
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+
+ /* if OK, compute maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ addr = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+ | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16)
+ | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8)
+ | ((args.tfRegister[IDE_SECTOR_OFFSET] ));
+ }
+ addr++; /* since the return value is (maxlba - 1), we add 1 */
+ return addr;
+}
+
+static unsigned long long idedisk_read_native_max_address_ext(ide_drive_t *drive)
+{
+ ide_task_t args;
+ unsigned long long addr = 0;
+
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+
+ args.tfRegister[IDE_SELECT_OFFSET] = 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX_EXT;
+ args.handler = task_no_data_intr;
+
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+
+ /* if OK, compute maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+ ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+ (args.hobRegister[IDE_SECTOR_OFFSET_HOB]);
+ u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+ ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+ (args.tfRegister[IDE_SECTOR_OFFSET]);
+ addr = ((__u64)high << 24) | low;
+ }
+ addr++; /* since the return value is (maxlba - 1), we add 1 */
+ return addr;
+}
+
+#ifdef CONFIG_IDEDISK_STROKE
+/*
+ * Sets maximum virtual LBA address of the drive.
+ * Returns new maximum virtual LBA address (> 0) or 0 on failure.
+ */
+static unsigned long idedisk_set_max_address(ide_drive_t *drive, unsigned long addr_req)
+{
+ ide_task_t args;
+ unsigned long addr_set = 0;
+
+ addr_req--;
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+ args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff);
+ args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >> 8) & 0xff);
+ args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >> 16) & 0xff);
+ args.tfRegister[IDE_SELECT_OFFSET] = ((addr_req >> 24) & 0x0f) | 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX;
+ args.handler = task_no_data_intr;
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+ /* if OK, read new maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ addr_set = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+ | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16)
+ | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8)
+ | ((args.tfRegister[IDE_SECTOR_OFFSET] ));
+ }
+ addr_set++;
+ return addr_set;
+}
+
+static unsigned long long idedisk_set_max_address_ext(ide_drive_t *drive, unsigned long long addr_req)
+{
+ ide_task_t args;
+ unsigned long long addr_set = 0;
+
+ addr_req--;
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+ args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff);
+ args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >>= 8) & 0xff);
+ args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >>= 8) & 0xff);
+ args.tfRegister[IDE_SELECT_OFFSET] = 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX_EXT;
+ args.hobRegister[IDE_SECTOR_OFFSET_HOB] = ((addr_req >>= 8) & 0xff);
+ args.hobRegister[IDE_LCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff);
+ args.hobRegister[IDE_HCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff);
+ args.hobRegister[IDE_SELECT_OFFSET_HOB] = 0x40;
+ args.hobRegister[IDE_CONTROL_OFFSET_HOB]= (drive->ctl|0x80);
+ args.handler = task_no_data_intr;
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+ /* if OK, compute maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+ ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+ (args.hobRegister[IDE_SECTOR_OFFSET_HOB]);
+ u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+ ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+ (args.tfRegister[IDE_SECTOR_OFFSET]);
+ addr_set = ((__u64)high << 24) | low;
+ }
+ return addr_set;
+}
+
+/*
+ * Tests if the drive supports Host Protected Area feature.
+ * Returns true if supported, false otherwise.
+ */
+static inline int idedisk_supports_host_protected_area(ide_drive_t *drive)
+{
+ int flag = (drive->id->cfs_enable_1 & 0x0400) ? 1 : 0;
+ printk("%s: host protected area => %d\n", drive->name, flag);
+ return flag;
+}
+
+#endif /* CONFIG_IDEDISK_STROKE */
+
+/*
+ * Compute drive->capacity, the full capacity of the drive
+ * Called with drive->id != NULL.
+ *
+ * To compute capacity, this uses either of
+ *
+ * 1. CHS value set by user (whatever user sets will be trusted)
+ * 2. LBA value from target drive (require new ATA feature)
+ * 3. LBA value from system BIOS (new one is OK, old one may break)
+ * 4. CHS value from system BIOS (traditional style)
+ *
+ * in above order (i.e., if value of higher priority is available,
+ * reset will be ignored).
+ */
+static void init_idedisk_capacity (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+ unsigned long capacity = drive->cyl * drive->head * drive->sect;
+ unsigned long set_max = idedisk_read_native_max_address(drive);
+ unsigned long long capacity_2 = capacity;
+ unsigned long long set_max_ext;
+
+ drive->capacity48 = 0;
+ drive->select.b.lba = 0;
+
+ if (id->cfs_enable_2 & 0x0400) {
+ capacity_2 = id->lba_capacity_2;
+ drive->head = drive->bios_head = 255;
+ drive->sect = drive->bios_sect = 63;
+ drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ set_max_ext = idedisk_read_native_max_address_ext(drive);
+ if (set_max_ext > capacity_2) {
+#ifdef CONFIG_IDEDISK_STROKE
+ set_max_ext = idedisk_read_native_max_address_ext(drive);
+ set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext);
+ if (set_max_ext) {
+ drive->capacity48 = capacity_2 = set_max_ext;
+ drive->cyl = (unsigned int) set_max_ext / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ drive->id->lba_capacity_2 = capacity_2;
+ }
+#else /* !CONFIG_IDEDISK_STROKE */
+ printk("%s: setmax_ext LBA %llu, native %llu\n",
+ drive->name, set_max_ext, capacity_2);
+#endif /* CONFIG_IDEDISK_STROKE */
+ }
+ drive->bios_cyl = drive->cyl;
+ drive->capacity48 = capacity_2;
+ drive->capacity = (unsigned long) capacity_2;
+ return;
+ /* Determine capacity, and use LBA if the drive properly supports it */
+ } else if ((id->capability & 2) && lba_capacity_is_ok(id)) {
+ capacity = id->lba_capacity;
+ drive->cyl = capacity / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ }
+
+ if (set_max > capacity) {
+#ifdef CONFIG_IDEDISK_STROKE
+ set_max = idedisk_read_native_max_address(drive);
+ set_max = idedisk_set_max_address(drive, set_max);
+ if (set_max) {
+ drive->capacity = capacity = set_max;
+ drive->cyl = set_max / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ drive->id->lba_capacity = capacity;
+ }
+#else /* !CONFIG_IDEDISK_STROKE */
+ printk("%s: setmax LBA %lu, native %lu\n",
+ drive->name, set_max, capacity);
+#endif /* CONFIG_IDEDISK_STROKE */
+ }
+
+ drive->capacity = capacity;
+
+ if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+ drive->capacity48 = id->lba_capacity_2;
+ drive->head = 255;
+ drive->sect = 63;
+ drive->cyl = (unsigned long)(drive->capacity48) / (drive->head * drive->sect);
+ }
+}
+
+static unsigned long idedisk_capacity (ide_drive_t *drive)
+{
+ if (drive->id->cfs_enable_2 & 0x0400)
+ return (drive->capacity48 - drive->sect0);
+ return (drive->capacity - drive->sect0);
+}
+
+static ide_startstop_t idedisk_special (ide_drive_t *drive)
+{
+ special_t *s = &drive->special;
+
+ if (s->b.set_geometry) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_handler_t *handler = NULL;
+
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+ s->b.set_geometry = 0;
+ taskfile.sector_number = drive->sect;
+ taskfile.low_cylinder = drive->cyl;
+ taskfile.high_cylinder = drive->cyl>>8;
+ taskfile.device_head = ((drive->head-1)|drive->select.all)&0xBF;
+ if (!IS_PDC4030_DRIVE) {
+ taskfile.sector_count = drive->sect;
+ taskfile.command = WIN_SPECIFY;
+ handler = ide_handler_parser(&taskfile, &hobfile);
+ }
+ do_taskfile(drive, &taskfile, &hobfile, handler);
+ } else if (s->b.recalibrate) {
+ s->b.recalibrate = 0;
+ if (!IS_PDC4030_DRIVE) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.sector_count = drive->sect;
+ taskfile.command = WIN_RESTORE;
+ do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+ }
+ } else if (s->b.set_multmode) {
+ s->b.set_multmode = 0;
+ if (drive->id && drive->mult_req > drive->id->max_multsect)
+ drive->mult_req = drive->id->max_multsect;
+ if (!IS_PDC4030_DRIVE) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.sector_count = drive->mult_req;
+ taskfile.command = WIN_SETMULT;
+ do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+ }
+ } else if (s->all) {
+ int special = s->all;
+ s->all = 0;
+ printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
+ return ide_stopped;
+ }
+ return IS_PDC4030_DRIVE ? ide_stopped : ide_started;
+}
+
+static void idedisk_pre_reset (ide_drive_t *drive)
+{
+ int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1;
+
+ drive->special.all = 0;
+ drive->special.b.set_geometry = legacy;
+ drive->special.b.recalibrate = legacy;
+ if (OK_TO_RESET_CONTROLLER)
+ drive->mult_count = 0;
+ if (!drive->keep_settings && !drive->using_dma)
+ drive->mult_req = 0;
+ if (drive->mult_req != drive->mult_count)
+ drive->special.b.set_multmode = 1;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int smart_enable(ide_drive_t *drive)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = SMART_ENABLE;
+ taskfile.low_cylinder = SMART_LCYL_PASS;
+ taskfile.high_cylinder = SMART_HCYL_PASS;
+ taskfile.command = WIN_SMART;
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int get_smart_values(ide_drive_t *drive, byte *buf)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = SMART_READ_VALUES;
+ taskfile.sector_count = 0x01;
+ taskfile.low_cylinder = SMART_LCYL_PASS;
+ taskfile.high_cylinder = SMART_HCYL_PASS;
+ taskfile.command = WIN_SMART;
+ (void) smart_enable(drive);
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int get_smart_thresholds(ide_drive_t *drive, byte *buf)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = SMART_READ_THRESHOLDS;
+ taskfile.sector_count = 0x01;
+ taskfile.low_cylinder = SMART_LCYL_PASS;
+ taskfile.high_cylinder = SMART_HCYL_PASS;
+ taskfile.command = WIN_SMART;
+ (void) smart_enable(drive);
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int proc_idedisk_read_cache
+ (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ ide_drive_t *drive = (ide_drive_t *) data;
+ char *out = page;
+ int len;
+
+ if (drive->id)
+ len = sprintf(out,"%i\n", drive->id->buf_size / 2);
+ else
+ len = sprintf(out,"(none)\n");
+ PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_thresholds
+ (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ ide_drive_t *drive = (ide_drive_t *)data;
+ int len = 0, i = 0;
+
+ if (!get_smart_thresholds(drive, page)) {
+ unsigned short *val = (unsigned short *) page;
+ char *out = ((char *)val) + (SECTOR_WORDS * 4);
+ page = out;
+ do {
+ out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+ val += 1;
+ } while (i < (SECTOR_WORDS * 2));
+ len = out - page;
+ }
+ PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_values
+ (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ ide_drive_t *drive = (ide_drive_t *)data;
+ int len = 0, i = 0;
+
+ if (!get_smart_values(drive, page)) {
+ unsigned short *val = (unsigned short *) page;
+ char *out = ((char *)val) + (SECTOR_WORDS * 4);
+ page = out;
+ do {
+ out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+ val += 1;
+ } while (i < (SECTOR_WORDS * 2));
+ len = out - page;
+ }
+ PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static ide_proc_entry_t idedisk_proc[] = {
+ { "cache", S_IFREG|S_IRUGO, proc_idedisk_read_cache, NULL },
+ { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL },
+ { "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_smart_values, NULL },
+ { "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_smart_thresholds, NULL },
+ { NULL, 0, NULL, NULL }
+};
+
+#else
+
+#define idedisk_proc NULL
+
+#endif /* CONFIG_PROC_FS */
+
+static int set_multcount(ide_drive_t *drive, int arg)
+{
+#ifdef __TASKFILE__IO
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+
+ if (drive->special.b.set_multmode)
+ return -EBUSY;
+
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.sector_count = drive->mult_req;
+ taskfile.command = WIN_SETMULT;
+ drive->mult_req = arg;
+ drive->special.b.set_multmode = 1;
+ ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+#else /* !__TASKFILE__IO */
+ struct request rq;
+
+ if (drive->special.b.set_multmode)
+ return -EBUSY;
+ ide_init_drive_cmd (&rq);
+ rq.cmd = IDE_DRIVE_CMD;
+ drive->mult_req = arg;
+ drive->special.b.set_multmode = 1;
+ (void) ide_do_drive_cmd (drive, &rq, ide_wait);
+#endif /* __TASKFILE__IO */
+ return (drive->mult_count == arg) ? 0 : -EIO;
+}
+
+static int set_nowerr(ide_drive_t *drive, int arg)
+{
+ if (ide_spin_wait_hwgroup(drive))
+ return -EBUSY;
+ drive->nowerr = arg;
+ drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
+ spin_unlock_irq(&io_request_lock);
+ return 0;
+}
+
+static int write_cache (ide_drive_t *drive, int arg)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE;
+ taskfile.command = WIN_SETFEATURES;
+
+ if (!(drive->id->cfs_enable_2 & 0x3000))
+ return 1;
+
+ (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+ drive->wcache = arg;
+ return 0;
+}
+
+static int do_idedisk_standby (ide_drive_t *drive)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.command = WIN_STANDBYNOW1;
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int do_idedisk_flushcache (ide_drive_t *drive)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ if (drive->id->cfs_enable_2 & 0x2400) {
+ taskfile.command = WIN_FLUSH_CACHE_EXT;
+ } else {
+ taskfile.command = WIN_FLUSH_CACHE;
+ }
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int set_acoustic (ide_drive_t *drive, int arg)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+ taskfile.feature = (arg)?SETFEATURES_EN_AAM:SETFEATURES_DIS_AAM;
+ taskfile.sector_count = arg;
+
+ taskfile.command = WIN_SETFEATURES;
+ (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+ drive->acoustic = arg;
+ return 0;
+}
+
+static int probe_lba_addressing (ide_drive_t *drive, int arg)
+{
+ drive->addressing = 0;
+
+ if (!(drive->id->cfs_enable_2 & 0x0400))
+ return -EIO;
+
+ drive->addressing = arg;
+ return 0;
+}
+
+static int set_lba_addressing (ide_drive_t *drive, int arg)
+{
+ return (probe_lba_addressing(drive, arg));
+}
+
+static void idedisk_add_settings(ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+#if 0
+ int major = HWIF(drive)->major;
+ int minor = drive->select.b.unit << PARTN_BITS;
+#endif
+
+ ide_add_setting(drive, "bios_cyl", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->bios_cyl, NULL);
+ ide_add_setting(drive, "bios_head", SETTING_RW, -1, -1, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL);
+ ide_add_setting(drive, "bios_sect", SETTING_RW, -1, -1, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL);
+ ide_add_setting(drive, "address", SETTING_RW, HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, TYPE_INTA, 0, 2, 1, 1, &drive->addressing, set_lba_addressing);
+ ide_add_setting(drive, "bswap", SETTING_READ, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->bswap, NULL);
+ ide_add_setting(drive, "multcount", id ? SETTING_RW : SETTING_READ, HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, TYPE_BYTE, 0, id ? id->max_multsect : 0, 1, 1, &drive->mult_count, set_multcount);
+ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr);
+#if 0
+ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 1, &read_ahead[major], NULL);
+ ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, 4096, PAGE_SIZE, 1024, &max_readahead[major][minor], NULL);
+ ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 1, &max_sectors[major][minor], NULL);
+#endif
+ ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL);
+ ide_add_setting(drive, "wcache", SETTING_RW, HDIO_GET_WCACHE, HDIO_SET_WCACHE, TYPE_BYTE, 0, 1, 1, 1, &drive->wcache, write_cache);
+ ide_add_setting(drive, "acoustic", SETTING_RW, HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, TYPE_BYTE, 0, 254, 1, 1, &drive->acoustic, set_acoustic);
+ ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL);
+ ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL);
+}
+
+static void idedisk_setup (ide_drive_t *drive)
+{
+ int i;
+
+ struct hd_driveid *id = drive->id;
+ unsigned long capacity;
+
+ printk (KERN_ALERT
+ "ide-disk.c::idedisk_setup: chs %d %d %d\n",
+ drive->cyl, drive->head, drive->sect);
+
+ idedisk_add_settings(drive);
+
+ if (id == NULL)
+ return;
+
+ /*
+ * CompactFlash cards and their brethern look just like hard drives
+ * to us, but they are removable and don't have a doorlock mechanism.
+ */
+ if (drive->removable && !drive_is_flashcard(drive)) {
+ /*
+ * Removable disks (eg. SYQUEST); ignore 'WD' drives
+ */
+ if (id->model[0] != 'W' || id->model[1] != 'D') {
+ drive->doorlocking = 1;
+ }
+ }
+ for (i = 0; i < MAX_DRIVES; ++i) {
+ ide_hwif_t *hwif = HWIF(drive);
+
+ if (drive != &hwif->drives[i]) continue;
+#ifdef DEVFS_MUST_DIE
+ hwif->gd->de_arr[i] = drive->de;
+#endif
+ if (drive->removable)
+ hwif->gd->flags[i] |= GENHD_FL_REMOVABLE;
+ break;
+ }
+
+ /* Extract geometry if we did not already have one for the drive */
+ if (!drive->cyl || !drive->head || !drive->sect) {
+ drive->cyl = drive->bios_cyl = id->cyls;
+ drive->head = drive->bios_head = id->heads;
+ drive->sect = drive->bios_sect = id->sectors;
+ }
+
+ /* Handle logical geometry translation by the drive */
+ if ((id->field_valid & 1) && id->cur_cyls &&
+ id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) {
+ drive->cyl = id->cur_cyls;
+ drive->head = id->cur_heads;
+ drive->sect = id->cur_sectors;
+ }
+
+ /* Use physical geometry if what we have still makes no sense */
+ if (drive->head > 16 && id->heads && id->heads <= 16) {
+ drive->cyl = id->cyls;
+ drive->head = id->heads;
+ drive->sect = id->sectors;
+ }
+
+ /* calculate drive capacity, and select LBA if possible */
+ init_idedisk_capacity (drive);
+
+ /*
+ * if possible, give fdisk access to more of the drive,
+ * by correcting bios_cyls:
+ */
+ capacity = idedisk_capacity (drive);
+ if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) &&
+ (!drive->forced_geom) && drive->bios_sect && drive->bios_head)
+ drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head;
+ printk (KERN_INFO "[XEN] %s: %ld sectors", drive->name, capacity);
+
+ /* Give size in megabytes (MB), not mebibytes (MiB). */
+ /* We compute the exact rounded value, avoiding overflow. */
+ printk (" (%ld MB)", (capacity - capacity/625 + 974)/1950);
+
+ /* Only print cache size when it was specified */
+ if (id->buf_size)
+ printk (" w/%dKiB Cache", id->buf_size/2);
+
+ printk(", CHS=%d/%d/%d",
+ drive->bios_cyl, drive->bios_head, drive->bios_sect);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (drive->using_dma)
+ (void) HWIF(drive)->dmaproc(ide_dma_verbose, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+ printk("\n");
+
+ drive->mult_count = 0;
+ if (id->max_multsect) {
+#ifdef CONFIG_IDEDISK_MULTI_MODE
+ id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0;
+ id->multsect_valid = id->multsect ? 1 : 0;
+ drive->mult_req = id->multsect_valid ? id->max_multsect : INITIAL_MULT_COUNT;
+ drive->special.b.set_multmode = drive->mult_req ? 1 : 0;
+#else /* original, pre IDE-NFG, per request of AC */
+ drive->mult_req = INITIAL_MULT_COUNT;
+ if (drive->mult_req > id->max_multsect)
+ drive->mult_req = id->max_multsect;
+ if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect))
+ drive->special.b.set_multmode = 1;
+#endif /* CONFIG_IDEDISK_MULTI_MODE */
+ }
+ drive->no_io_32bit = id->dword_io ? 1 : 0;
+ if (drive->id->cfs_enable_2 & 0x3000)
+ write_cache(drive, (id->cfs_enable_2 & 0x3000));
+ (void) probe_lba_addressing(drive, 1);
+}
+
+static int idedisk_cleanup (ide_drive_t *drive)
+{
+ if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+ if (do_idedisk_flushcache(drive))
+ printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+ drive->name);
+ return ide_unregister_subdriver(drive);
+}
+
+int idedisk_reinit(ide_drive_t *drive);
+
+/*
+ * IDE subdriver functions, registered with ide.c
+ */
+static ide_driver_t idedisk_driver = {
+ name: "ide-disk",
+ version: IDEDISK_VERSION,
+ media: ide_disk,
+ busy: 0,
+ supports_dma: 1,
+ supports_dsc_overlap: 0,
+ cleanup: idedisk_cleanup,
+ standby: do_idedisk_standby,
+ flushcache: do_idedisk_flushcache,
+ do_request: do_rw_disk,
+ end_request: NULL,
+ ioctl: NULL,
+ open: idedisk_open,
+ release: idedisk_release,
+ media_change: idedisk_media_change,
+ revalidate: idedisk_revalidate,
+ pre_reset: idedisk_pre_reset,
+ capacity: idedisk_capacity,
+ special: idedisk_special,
+ /*proc: idedisk_proc,*/
+ reinit: idedisk_reinit,
+ ata_prebuilder: NULL,
+ atapi_prebuilder: NULL,
+};
+
+int idedisk_init (void);
+static ide_module_t idedisk_module = {
+ IDE_DRIVER_MODULE,
+ idedisk_init,
+ &idedisk_driver,
+ NULL
+};
+
+MODULE_DESCRIPTION("ATA DISK Driver");
+
+int idedisk_reinit (ide_drive_t *drive)
+{
+ int failed = 0;
+
+ MOD_INC_USE_COUNT;
+
+ if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+ printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+ return 1;
+ }
+ DRIVER(drive)->busy++;
+ idedisk_setup(drive);
+ if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+ printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+ (void) idedisk_cleanup(drive);
+ DRIVER(drive)->busy--;
+ return 1;
+ }
+ DRIVER(drive)->busy--;
+ failed--;
+
+ ide_register_module(&idedisk_module);
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+static void __exit idedisk_exit (void)
+{
+ ide_drive_t *drive;
+ int failed = 0;
+
+ while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, &idedisk_driver, failed)) != NULL) {
+ if (idedisk_cleanup (drive)) {
+ printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name);
+ failed++;
+ }
+ /* We must remove proc entries defined in this module.
+ Otherwise we oops while accessing these entries */
+#ifdef CONFIG_PROC_FS
+ if (drive->proc)
+ ide_remove_proc_entries(drive->proc, idedisk_proc);
+#endif
+ }
+ ide_unregister_module(&idedisk_module);
+}
+
+int idedisk_init (void)
+{
+ ide_drive_t *drive;
+ int failed = 0;
+
+ MOD_INC_USE_COUNT;
+ while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, NULL, failed++)) != NULL) {
+ if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+ printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+ continue;
+ }
+ DRIVER(drive)->busy++;
+ idedisk_setup(drive);
+ if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+ printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+ (void) idedisk_cleanup(drive);
+ DRIVER(drive)->busy--;
+ continue;
+ }
+ DRIVER(drive)->busy--;
+ failed--;
+ }
+ ide_register_module(&idedisk_module);
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+module_init(idedisk_init);
+module_exit(idedisk_exit);
+MODULE_LICENSE("GPL");
diff --git a/xen/drivers/ide/ide-dma.c b/xen/drivers/ide/ide-dma.c
new file mode 100644
index 0000000000..6ce5fd4b1f
--- /dev/null
+++ b/xen/drivers/ide/ide-dma.c
@@ -0,0 +1,913 @@
+/*
+ * linux/drivers/ide/ide-dma.c Version 4.10 June 9, 2000
+ *
+ * Copyright (c) 1999-2000 Andre Hedrick <andre@linux-ide.org>
+ * May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ * Special Thanks to Mark for his Six years of work.
+ *
+ * Copyright (c) 1995-1998 Mark Lord
+ * May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ * This module provides support for the bus-master IDE DMA functions
+ * of various PCI chipsets, including the Intel PIIX (i82371FB for
+ * the 430 FX chipset), the PIIX3 (i82371SB for the 430 HX/VX and
+ * 440 chipsets), and the PIIX4 (i82371AB for the 430 TX chipset)
+ * ("PIIX" stands for "PCI ISA IDE Xcellerator").
+ *
+ * Pretty much the same code works for other IDE PCI bus-mastering chipsets.
+ *
+ * DMA is supported for all IDE devices (disk drives, cdroms, tapes, floppies).
+ *
+ * By default, DMA support is prepared for use, but is currently enabled only
+ * for drives which already have DMA enabled (UltraDMA or mode 2 multi/single),
+ * or which are recognized as "good" (see table below). Drives with only mode0
+ * or mode1 (multi/single) DMA should also work with this chipset/driver
+ * (eg. MC2112A) but are not enabled by default.
+ *
+ * Use "hdparm -i" to view modes supported by a given drive.
+ *
+ * The hdparm-3.5 (or later) utility can be used for manually enabling/disabling
+ * DMA support, but must be (re-)compiled against this kernel version or later.
+ *
+ * To enable DMA, use "hdparm -d1 /dev/hd?" on a per-drive basis after booting.
+ * If problems arise, ide.c will disable DMA operation after a few retries.
+ * This error recovery mechanism works and has been extremely well exercised.
+ *
+ * IDE drives, depending on their vintage, may support several different modes
+ * of DMA operation. The boot-time modes are indicated with a "*" in
+ * the "hdparm -i" listing, and can be changed with *knowledgeable* use of
+ * the "hdparm -X" feature. There is seldom a need to do this, as drives
+ * normally power-up with their "best" PIO/DMA modes enabled.
+ *
+ * Testing has been done with a rather extensive number of drives,
+ * with Quantum & Western Digital models generally outperforming the pack,
+ * and Fujitsu & Conner (and some Seagate which are really Conner) drives
+ * showing more lackluster throughput.
+ *
+ * Keep an eye on /var/adm/messages for "DMA disabled" messages.
+ *
+ * Some people have reported trouble with Intel Zappa motherboards.
+ * This can be fixed by upgrading the AMI BIOS to version 1.00.04.BS0,
+ * available from ftp://ftp.intel.com/pub/bios/10004bs0.exe
+ * (thanks to Glen Morrell <glen@spin.Stanford.edu> for researching this).
+ *
+ * Thanks to "Christopher J. Reimer" <reimer@doe.carleton.ca> for
+ * fixing the problem with the BIOS on some Acer motherboards.
+ *
+ * Thanks to "Benoit Poulot-Cazajous" <poulot@chorus.fr> for testing
+ * "TX" chipset compatibility and for providing patches for the "TX" chipset.
+ *
+ * Thanks to Christian Brunner <chb@muc.de> for taking a good first crack
+ * at generic DMA -- his patches were referred to when preparing this code.
+ *
+ * Most importantly, thanks to Robert Bringman <rob@mars.trion.com>
+ * for supplying a Promise UDMA board & WD UDMA drive for this work!
+ *
+ * And, yes, Intel Zappa boards really *do* use both PIIX IDE ports.
+ *
+ * check_drive_lists(ide_drive_t *drive, int good_bad)
+ *
+ * ATA-66/100 and recovery functions, I forgot the rest......
+ * SELECT_READ_WRITE(hwif,drive,func) for active tuning based on IO direction.
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/pci.h>
+#include <xeno/init.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+/*
+ * Long lost data from 2.0.34 that is now in 2.0.39
+ *
+ * This was used in ./drivers/block/triton.c to do DMA Base address setup
+ * when PnP failed. Oh the things we forget. I believe this was part
+ * of SFF-8038i that has been withdrawn from public access... :-((
+ */
+#define DEFAULT_BMIBA 0xe800 /* in case BIOS did not init it */
+#define DEFAULT_BMCRBA 0xcc00 /* VIA's default value */
+#define DEFAULT_BMALIBA 0xd400 /* ALI's default value */
+
+extern char *ide_dmafunc_verbose(ide_dma_action_t dmafunc);
+
+#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS
+
+struct drive_list_entry {
+ char * id_model;
+ char * id_firmware;
+};
+
+struct drive_list_entry drive_whitelist [] = {
+
+ { "Micropolis 2112A" , "ALL" },
+ { "CONNER CTMA 4000" , "ALL" },
+ { "CONNER CTT8000-A" , "ALL" },
+ { "ST34342A" , "ALL" },
+ { 0 , 0 }
+};
+
+struct drive_list_entry drive_blacklist [] = {
+
+ { "WDC AC11000H" , "ALL" },
+ { "WDC AC22100H" , "ALL" },
+ { "WDC AC31000H" , "ALL" },
+ { "WDC AC32500H" , "ALL" },
+ { "WDC AC33100H" , "ALL" },
+ { "WDC AC31600H" , "ALL" },
+ { "WDC AC32100H" , "24.09P07" },
+ { "WDC AC23200L" , "21.10N21" },
+ { "Compaq CRD-8241B" , "ALL" },
+ { "CRD-8400B" , "ALL" },
+ { "CRD-8480B", "ALL" },
+ { "CRD-8480C", "ALL" },
+ { "CRD-8482B", "ALL" },
+ { "CRD-84" , "ALL" },
+ { "SanDisk SDP3B" , "ALL" },
+ { "SanDisk SDP3B-64" , "ALL" },
+ { "SANYO CD-ROM CRD" , "ALL" },
+ { "HITACHI CDR-8" , "ALL" },
+ { "HITACHI CDR-8335" , "ALL" },
+ { "HITACHI CDR-8435" , "ALL" },
+ { "Toshiba CD-ROM XM-6202B" , "ALL" },
+ { "CD-532E-A" , "ALL" },
+ { "E-IDE CD-ROM CR-840", "ALL" },
+ { "CD-ROM Drive/F5A", "ALL" },
+ { "RICOH CD-R/RW MP7083A", "ALL" },
+ { "WPI CDD-820", "ALL" },
+ { "SAMSUNG CD-ROM SC-148C", "ALL" },
+ { "SAMSUNG CD-ROM SC-148F", "ALL" },
+ { "SAMSUNG CD-ROM SC", "ALL" },
+ { "SanDisk SDP3B-64" , "ALL" },
+ { "SAMSUNG CD-ROM SN-124", "ALL" },
+ { "PLEXTOR CD-R PX-W8432T", "ALL" },
+ { "ATAPI CD-ROM DRIVE 40X MAXIMUM", "ALL" },
+ { "_NEC DV5800A", "ALL" },
+ { 0 , 0 }
+
+};
+
+int in_drive_list(struct hd_driveid *id, struct drive_list_entry * drive_table)
+{
+ for ( ; drive_table->id_model ; drive_table++)
+ if ((!strcmp(drive_table->id_model, id->model)) &&
+ ((!strstr(drive_table->id_firmware, id->fw_rev)) ||
+ (!strcmp(drive_table->id_firmware, "ALL"))))
+ return 1;
+ return 0;
+}
+
+#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+
+/*
+ * good_dma_drives() lists the model names (from "hdparm -i")
+ * of drives which do not support mode2 DMA but which are
+ * known to work fine with this interface under Linux.
+ */
+const char *good_dma_drives[] = {"Micropolis 2112A",
+ "CONNER CTMA 4000",
+ "CONNER CTT8000-A",
+ "ST34342A", /* for Sun Ultra */
+ NULL};
+
+/*
+ * bad_dma_drives() lists the model names (from "hdparm -i")
+ * of drives which supposedly support (U)DMA but which are
+ * known to corrupt data with this interface under Linux.
+ *
+ * This is an empirical list. Its generated from bug reports. That means
+ * while it reflects actual problem distributions it doesn't answer whether
+ * the drive or the controller, or cabling, or software, or some combination
+ * thereof is the fault. If you don't happen to agree with the kernel's
+ * opinion of your drive - use hdparm to turn DMA on.
+ */
+const char *bad_dma_drives[] = {"WDC AC11000H",
+ "WDC AC22100H",
+ "WDC AC32100H",
+ "WDC AC32500H",
+ "WDC AC33100H",
+ "WDC AC31600H",
+ NULL};
+
+#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+
+/*
+ * Our Physical Region Descriptor (PRD) table should be large enough
+ * to handle the biggest I/O request we are likely to see. Since requests
+ * can have no more than 256 sectors, and since the typical blocksize is
+ * two or more sectors, we could get by with a limit of 128 entries here for
+ * the usual worst case. Most requests seem to include some contiguous blocks,
+ * further reducing the number of table entries required.
+ *
+ * The driver reverts to PIO mode for individual requests that exceed
+ * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling
+ * 100% of all crazy scenarios here is not necessary.
+ *
+ * As it turns out though, we must allocate a full 4KB page for this,
+ * so the two PRD tables (ide0 & ide1) will each get half of that,
+ * allowing each to have about 256 entries (8 bytes each) from this.
+ */
+#define PRD_BYTES 8
+#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES))
+
+/*
+ * dma_intr() is the handler for disk read/write DMA interrupts
+ */
+ide_startstop_t ide_dma_intr (ide_drive_t *drive)
+{
+ int i;
+ byte stat, dma_stat;
+
+ dma_stat = HWIF(drive)->dmaproc(ide_dma_end, drive);
+ stat = GET_STAT(); /* get drive status */
+ if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) {
+ if (!dma_stat) {
+ struct request *rq = HWGROUP(drive)->rq;
+ rq = HWGROUP(drive)->rq;
+ for (i = rq->nr_sectors; i > 0;) {
+ i -= rq->current_nr_sectors;
+ ide_end_request(1, HWGROUP(drive));
+ }
+ return ide_stopped;
+ }
+ printk("%s: dma_intr: bad DMA status (dma_stat=%x)\n",
+ drive->name, dma_stat);
+ }
+ return ide_error(drive, "dma_intr", stat);
+}
+
+static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq)
+{
+ struct buffer_head *bh;
+ struct scatterlist *sg = hwif->sg_table;
+ unsigned long lastdataend = ~0UL;
+ int nents = 0;
+
+ if (hwif->sg_dma_active)
+ BUG();
+
+ if (rq->cmd == READ)
+ hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
+ else
+ hwif->sg_dma_direction = PCI_DMA_TODEVICE;
+
+ bh = rq->bh;
+ do {
+ struct scatterlist *sge;
+
+ /*
+ * continue segment from before?
+ */
+ if (bh_phys(bh) == lastdataend) {
+ sg[nents - 1].length += bh->b_size;
+ lastdataend += bh->b_size;
+ continue;
+ }
+
+ /*
+ * start new segment
+ */
+ if (nents >= PRD_ENTRIES)
+ return 0;
+
+ sge = &sg[nents];
+ memset(sge, 0, sizeof(*sge));
+
+ if (bh->b_page) {
+ sge->page = bh->b_page;
+ sge->offset = bh_offset(bh);
+ } else {
+
+
+#if 0
+ /* below is wrong for xen since b_data is actually
+ a 'physical / virtual' thingy. Ask KAF. */
+ if (((unsigned long) bh->b_data) < PAGE_SIZE)
+ BUG();
+#endif
+
+ sge->address = bh->b_data;
+ }
+
+ sge->length = bh->b_size;
+ lastdataend = bh_phys(bh) + bh->b_size;
+ nents++;
+ } while ((bh = bh->b_reqnext) != NULL);
+
+ return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
+}
+
+static int ide_raw_build_sglist (ide_hwif_t *hwif, struct request *rq)
+{
+ struct scatterlist *sg = hwif->sg_table;
+ int nents = 0;
+ ide_task_t *args = rq->special;
+ unsigned char *virt_addr = rq->buffer;
+ int sector_count = rq->nr_sectors;
+
+// if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA) ||
+// (args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA_EXT))
+ if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
+ hwif->sg_dma_direction = PCI_DMA_TODEVICE;
+ else
+ hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
+
+ if (sector_count > 128) {
+ memset(&sg[nents], 0, sizeof(*sg));
+ sg[nents].address = virt_addr;
+ sg[nents].length = 128 * SECTOR_SIZE;
+ nents++;
+ virt_addr = virt_addr + (128 * SECTOR_SIZE);
+ sector_count -= 128;
+ }
+ memset(&sg[nents], 0, sizeof(*sg));
+ sg[nents].address = virt_addr;
+ sg[nents].length = sector_count * SECTOR_SIZE;
+ nents++;
+
+ return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
+}
+
+/*
+ * ide_build_dmatable() prepares a dma request.
+ * Returns 0 if all went okay, returns 1 otherwise.
+ * May also be invoked from trm290.c
+ */
+int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func)
+{
+ unsigned int *table = HWIF(drive)->dmatable_cpu;
+#ifdef CONFIG_BLK_DEV_TRM290
+ unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290);
+#else
+ const int is_trm290_chipset = 0;
+#endif
+ unsigned int count = 0;
+ int i;
+ struct scatterlist *sg;
+
+ if (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE)
+ HWIF(drive)->sg_nents = i = ide_raw_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
+ else
+ HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
+
+ if (!i)
+ return 0;
+
+ sg = HWIF(drive)->sg_table;
+ while (i) {
+ u32 cur_addr;
+ u32 cur_len;
+
+ cur_addr = sg_dma_address(sg);
+ cur_len = sg_dma_len(sg);
+
+ /*
+ * Fill in the dma table, without crossing any 64kB boundaries.
+ * Most hardware requires 16-bit alignment of all blocks,
+ * but the trm290 requires 32-bit alignment.
+ */
+
+ while (cur_len) {
+ u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff);
+
+ if (count++ >= PRD_ENTRIES)
+ BUG();
+
+ if (bcount > cur_len)
+ bcount = cur_len;
+ *table++ = cpu_to_le32(cur_addr);
+ xcount = bcount & 0xffff;
+ if (is_trm290_chipset)
+ xcount = ((xcount >> 2) - 1) << 16;
+ if (xcount == 0x0000) {
+ /*
+ * Most chipsets correctly interpret a length
+ * of 0x0000 as 64KB, but at least one
+ * (e.g. CS5530) misinterprets it as zero (!).
+ * So here we break the 64KB entry into two
+ * 32KB entries instead.
+ */
+ if (count++ >= PRD_ENTRIES)
+ goto use_pio_instead;
+
+ *table++ = cpu_to_le32(0x8000);
+ *table++ = cpu_to_le32(cur_addr + 0x8000);
+ xcount = 0x8000;
+ }
+ *table++ = cpu_to_le32(xcount);
+ cur_addr += bcount;
+ cur_len -= bcount;
+ }
+
+ sg++;
+ i--;
+ }
+
+ if (count) {
+ if (!is_trm290_chipset)
+ *--table |= cpu_to_le32(0x80000000);
+ return count;
+ }
+ printk("%s: empty DMA table?\n", drive->name);
+use_pio_instead:
+ pci_unmap_sg(HWIF(drive)->pci_dev,
+ HWIF(drive)->sg_table,
+ HWIF(drive)->sg_nents,
+ HWIF(drive)->sg_dma_direction);
+ HWIF(drive)->sg_dma_active = 0;
+ return 0; /* revert to PIO for this request */
+}
+
+/* Teardown mappings after DMA has completed. */
+void ide_destroy_dmatable (ide_drive_t *drive)
+{
+ struct pci_dev *dev = HWIF(drive)->pci_dev;
+ struct scatterlist *sg = HWIF(drive)->sg_table;
+ int nents = HWIF(drive)->sg_nents;
+
+ pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction);
+ HWIF(drive)->sg_dma_active = 0;
+}
+
+/*
+ * For both Blacklisted and Whitelisted drives.
+ * This is setup to be called as an extern for future support
+ * to other special driver code.
+ */
+int check_drive_lists (ide_drive_t *drive, int good_bad)
+{
+ struct hd_driveid *id = drive->id;
+
+#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS
+ if (good_bad) {
+ return in_drive_list(id, drive_whitelist);
+ } else {
+ int blacklist = in_drive_list(id, drive_blacklist);
+ if (blacklist)
+ printk("%s: Disabling (U)DMA for %s\n", drive->name, id->model);
+ return(blacklist);
+ }
+#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+ const char **list;
+
+ if (good_bad) {
+ /* Consult the list of known "good" drives */
+ list = good_dma_drives;
+ while (*list) {
+ if (!strcmp(*list++,id->model))
+ return 1;
+ }
+ } else {
+ /* Consult the list of known "bad" drives */
+ list = bad_dma_drives;
+ while (*list) {
+ if (!strcmp(*list++,id->model)) {
+ printk("%s: Disabling (U)DMA for %s\n",
+ drive->name, id->model);
+ return 1;
+ }
+ }
+ }
+#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+ return 0;
+}
+
+int report_drive_dmaing (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+
+ if ((id->field_valid & 4) && (eighty_ninty_three(drive)) &&
+ (id->dma_ultra & (id->dma_ultra >> 14) & 3)) {
+ if ((id->dma_ultra >> 15) & 1) {
+ printk(", UDMA(mode 7)"); /* UDMA BIOS-enabled! */
+ } else {
+ printk(", UDMA(133)"); /* UDMA BIOS-enabled! */
+ }
+ } else if ((id->field_valid & 4) && (eighty_ninty_three(drive)) &&
+ (id->dma_ultra & (id->dma_ultra >> 11) & 7)) {
+ if ((id->dma_ultra >> 13) & 1) {
+ printk(", UDMA(100)"); /* UDMA BIOS-enabled! */
+ } else if ((id->dma_ultra >> 12) & 1) {
+ printk(", UDMA(66)"); /* UDMA BIOS-enabled! */
+ } else {
+ printk(", UDMA(44)"); /* UDMA BIOS-enabled! */
+ }
+ } else if ((id->field_valid & 4) &&
+ (id->dma_ultra & (id->dma_ultra >> 8) & 7)) {
+ if ((id->dma_ultra >> 10) & 1) {
+ printk(", UDMA(33)"); /* UDMA BIOS-enabled! */
+ } else if ((id->dma_ultra >> 9) & 1) {
+ printk(", UDMA(25)"); /* UDMA BIOS-enabled! */
+ } else {
+ printk(", UDMA(16)"); /* UDMA BIOS-enabled! */
+ }
+ } else if (id->field_valid & 4) {
+ printk(", (U)DMA"); /* Can be BIOS-enabled! */
+ } else {
+ printk(", DMA");
+ }
+ return 1;
+}
+
+static int config_drive_for_dma (ide_drive_t *drive)
+{
+ int config_allows_dma = 1;
+ struct hd_driveid *id = drive->id;
+ ide_hwif_t *hwif = HWIF(drive);
+
+#ifdef CONFIG_IDEDMA_ONLYDISK
+ if (drive->media != ide_disk)
+ config_allows_dma = 0;
+#endif
+
+ if (id && (id->capability & 1) && hwif->autodma && config_allows_dma) {
+ /* Consult the list of known "bad" drives */
+ if (ide_dmaproc(ide_dma_bad_drive, drive))
+ return hwif->dmaproc(ide_dma_off, drive);
+
+ /* Enable DMA on any drive that has UltraDMA (mode 6/7/?) enabled */
+ if ((id->field_valid & 4) && (eighty_ninty_three(drive)))
+ if ((id->dma_ultra & (id->dma_ultra >> 14) & 2))
+ return hwif->dmaproc(ide_dma_on, drive);
+ /* Enable DMA on any drive that has UltraDMA (mode 3/4/5) enabled */
+ if ((id->field_valid & 4) && (eighty_ninty_three(drive)))
+ if ((id->dma_ultra & (id->dma_ultra >> 11) & 7))
+ return hwif->dmaproc(ide_dma_on, drive);
+ /* Enable DMA on any drive that has UltraDMA (mode 0/1/2) enabled */
+ if (id->field_valid & 4) /* UltraDMA */
+ if ((id->dma_ultra & (id->dma_ultra >> 8) & 7))
+ return hwif->dmaproc(ide_dma_on, drive);
+ /* Enable DMA on any drive that has mode2 DMA (multi or single) enabled */
+ if (id->field_valid & 2) /* regular DMA */
+ if ((id->dma_mword & 0x404) == 0x404 || (id->dma_1word & 0x404) == 0x404)
+ return hwif->dmaproc(ide_dma_on, drive);
+ /* Consult the list of known "good" drives */
+ if (ide_dmaproc(ide_dma_good_drive, drive))
+ return hwif->dmaproc(ide_dma_on, drive);
+ }
+ return hwif->dmaproc(ide_dma_off_quietly, drive);
+}
+
+#ifndef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+/*
+ * 1 dmaing, 2 error, 4 intr
+ */
+static int dma_timer_expiry (ide_drive_t *drive)
+{
+ byte dma_stat = inb(HWIF(drive)->dma_base+2);
+
+#ifdef DEBUG
+ printk("%s: dma_timer_expiry: dma status == 0x%02x\n", drive->name, dma_stat);
+#endif /* DEBUG */
+
+#if 0
+ HWGROUP(drive)->expiry = NULL; /* one free ride for now */
+#endif
+
+ if (dma_stat & 2) { /* ERROR */
+ byte stat = GET_STAT();
+ return ide_error(drive, "dma_timer_expiry", stat);
+ }
+ if (dma_stat & 1) /* DMAing */
+ return WAIT_CMD;
+ return 0;
+}
+#else /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+static ide_startstop_t ide_dma_timeout_revovery (ide_drive_t *drive)
+{
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ ide_hwif_t *hwif = HWIF(drive);
+ int enable_dma = drive->using_dma;
+ unsigned long flags;
+ ide_startstop_t startstop;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ hwgroup->handler = NULL;
+ del_timer(&hwgroup->timer);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ drive->waiting_for_dma = 0;
+
+ startstop = ide_do_reset(drive);
+
+ if ((enable_dma) && !(drive->using_dma))
+ (void) hwif->dmaproc(ide_dma_on, drive);
+
+ return startstop;
+}
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+
+#if 0
+static inline void ide_toggle_bounce(ide_drive_t *drive, int on)
+{
+ dma64_addr_t addr = BLK_BOUNCE_HIGH;
+
+ if (HWIF(drive)->no_highio || HWIF(drive)->pci_dev == NULL)
+ return;
+
+ if (on && drive->media == ide_disk) {
+ if (!PCI_DMA_BUS_IS_PHYS)
+ addr = BLK_BOUNCE_ANY;
+ else
+ addr = HWIF(drive)->pci_dev->dma_mask;
+ }
+
+ blk_queue_bounce_limit(&drive->queue, addr);
+}
+#endif
+
+/*
+ * ide_dmaproc() initiates/aborts DMA read/write operations on a drive.
+ *
+ * The caller is assumed to have selected the drive and programmed the drive's
+ * sector address using CHS or LBA. All that remains is to prepare for DMA
+ * and then issue the actual read/write DMA/PIO command to the drive.
+ *
+ * For ATAPI devices, we just prepare for DMA and return. The caller should
+ * then issue the packet command to the drive and call us again with
+ * ide_dma_begin afterwards.
+ *
+ * Returns 0 if all went well.
+ * Returns 1 if DMA read/write could not be started, in which case
+ * the caller should revert to PIO for the current request.
+ * May also be invoked from trm290.c
+ */
+int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive)
+{
+// ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ ide_hwif_t *hwif = HWIF(drive);
+ unsigned long dma_base = hwif->dma_base;
+ byte unit = (drive->select.b.unit & 0x01);
+ unsigned int count, reading = 0/*, set_high = 1*/;
+ byte dma_stat;
+
+ switch (func) {
+ case ide_dma_off:
+ printk("%s: DMA disabled\n", drive->name);
+ case ide_dma_off_quietly:
+ /*set_high = 0;*/
+ outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2);
+ case ide_dma_on:
+ drive->using_dma = (func == ide_dma_on);
+ if (drive->using_dma)
+ outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2);
+ /*ide_toggle_bounce(drive, set_high);*/
+ return 0;
+ case ide_dma_check:
+ return config_drive_for_dma (drive);
+ case ide_dma_read:
+ reading = 1 << 3;
+ case ide_dma_write:
+ SELECT_READ_WRITE(hwif,drive,func);
+ if (!(count = ide_build_dmatable(drive, func)))
+ return 1; /* try PIO instead of DMA */
+ outl(hwif->dmatable_dma, dma_base + 4); /* PRD table */
+ outb(reading, dma_base); /* specify r/w */
+ outb(inb(dma_base+2)|6, dma_base+2); /* clear INTR & ERROR flags */
+ drive->waiting_for_dma = 1;
+ if (drive->media != ide_disk)
+ return 0;
+#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+ ide_set_handler(drive, &ide_dma_intr, 2*WAIT_CMD, NULL); /* issue cmd to drive */
+#else /* !CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+ ide_set_handler(drive, &ide_dma_intr, WAIT_CMD, dma_timer_expiry); /* issue cmd to drive */
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+ if ((HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE) &&
+ (drive->addressing == 1)) {
+ ide_task_t *args = HWGROUP(drive)->rq->special;
+ OUT_BYTE(args->tfRegister[IDE_COMMAND_OFFSET], IDE_COMMAND_REG);
+ } else if (drive->addressing) {
+ OUT_BYTE(reading ? WIN_READDMA_EXT : WIN_WRITEDMA_EXT, IDE_COMMAND_REG);
+ } else {
+ OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
+ }
+ return HWIF(drive)->dmaproc(ide_dma_begin, drive);
+ case ide_dma_begin:
+ /* Note that this is done *after* the cmd has
+ * been issued to the drive, as per the BM-IDE spec.
+ * The Promise Ultra33 doesn't work correctly when
+ * we do this part before issuing the drive cmd.
+ */
+ outb(inb(dma_base)|1, dma_base); /* start DMA */
+ return 0;
+ case ide_dma_end: /* returns 1 on error, 0 otherwise */
+ drive->waiting_for_dma = 0;
+ outb(inb(dma_base)&~1, dma_base); /* stop DMA */
+ dma_stat = inb(dma_base+2); /* get DMA status */
+ outb(dma_stat|6, dma_base+2); /* clear the INTR & ERROR bits */
+ ide_destroy_dmatable(drive); /* purge DMA mappings */
+ return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0; /* verify good DMA status */
+ case ide_dma_test_irq: /* returns 1 if dma irq issued, 0 otherwise */
+ dma_stat = inb(dma_base+2);
+#if 0 /* do not set unless you know what you are doing */
+ if (dma_stat & 4) {
+ byte stat = GET_STAT();
+ outb(dma_base+2, dma_stat & 0xE4);
+ }
+#endif
+ return (dma_stat & 4) == 4; /* return 1 if INTR asserted */
+ case ide_dma_bad_drive:
+ case ide_dma_good_drive:
+ return check_drive_lists(drive, (func == ide_dma_good_drive));
+ case ide_dma_verbose:
+ return report_drive_dmaing(drive);
+ case ide_dma_timeout:
+ // FIXME: Many IDE chipsets do not permit command file register access
+ // FIXME: while the bus-master function is still active.
+ // FIXME: To prevent deadlock with those chipsets, we must be extremely
+ // FIXME: careful here (and in ide_intr() as well) to NOT access any
+ // FIXME: registers from the 0x1Fx/0x17x sets before terminating the
+ // FIXME: bus-master operation via the bus-master control reg.
+ // FIXME: Otherwise, chipset deadlock will occur, and some systems will
+ // FIXME: lock up completely!!
+#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+ /*
+ * Have to issue an abort and requeue the request
+ * DMA engine got turned off by a goofy ASIC, and
+ * we have to clean up the mess, and here is as good
+ * as any. Do it globally for all chipsets.
+ */
+ outb(0x00, dma_base); /* stop DMA */
+ dma_stat = inb(dma_base+2); /* get DMA status */
+ outb(dma_stat|6, dma_base+2); /* clear the INTR & ERROR bits */
+ printk("%s: %s: Lets do it again!" \
+ "stat = 0x%02x, dma_stat = 0x%02x\n",
+ drive->name, ide_dmafunc_verbose(func),
+ GET_STAT(), dma_stat);
+
+ if (dma_stat & 0xF0)
+ return ide_dma_timeout_revovery(drive);
+
+ printk("%s: %s: (restart_request) Lets do it again!" \
+ "stat = 0x%02x, dma_stat = 0x%02x\n",
+ drive->name, ide_dmafunc_verbose(func),
+ GET_STAT(), dma_stat);
+
+ return restart_request(drive); // BUG: return types do not match!!
+//#else
+// return HWGROUP(drive)->handler(drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+ case ide_dma_retune:
+ case ide_dma_lostirq:
+ printk("ide_dmaproc: chipset supported %s func only: %d\n", ide_dmafunc_verbose(func), func);
+ return 1;
+ default:
+ printk("ide_dmaproc: unsupported %s func: %d\n", ide_dmafunc_verbose(func), func);
+ return 1;
+ }
+}
+
+/*
+ * Needed for allowing full modular support of ide-driver
+ */
+int ide_release_dma (ide_hwif_t *hwif)
+{
+ if (hwif->dmatable_cpu) {
+ pci_free_consistent(hwif->pci_dev,
+ PRD_ENTRIES * PRD_BYTES,
+ hwif->dmatable_cpu,
+ hwif->dmatable_dma);
+ hwif->dmatable_cpu = NULL;
+ }
+ if (hwif->sg_table) {
+ kfree(hwif->sg_table);
+ hwif->sg_table = NULL;
+ }
+ if ((hwif->dma_extra) && (hwif->channel == 0))
+ release_region((hwif->dma_base + 16), hwif->dma_extra);
+ release_region(hwif->dma_base, 8);
+ return 1;
+}
+
+/*
+ * This can be called for a dynamically installed interface. Don't __init it
+ */
+
+void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_ports)
+{
+ printk(" %s: BM-DMA at 0x%04lx-0x%04lx", hwif->name, dma_base, dma_base + num_ports - 1);
+ if (check_region(dma_base, num_ports)) {
+ printk(" -- ERROR, PORT ADDRESSES ALREADY IN USE\n");
+ return;
+ }
+ request_region(dma_base, num_ports, hwif->name);
+ hwif->dma_base = dma_base;
+ hwif->dmatable_cpu = pci_alloc_consistent(hwif->pci_dev,
+ PRD_ENTRIES * PRD_BYTES,
+ &hwif->dmatable_dma);
+ if (hwif->dmatable_cpu == NULL)
+ goto dma_alloc_failure;
+
+ hwif->sg_table = kmalloc(sizeof(struct scatterlist) * PRD_ENTRIES,
+ GFP_KERNEL);
+ if (hwif->sg_table == NULL) {
+ pci_free_consistent(hwif->pci_dev, PRD_ENTRIES * PRD_BYTES,
+ hwif->dmatable_cpu, hwif->dmatable_dma);
+ goto dma_alloc_failure;
+ }
+
+ hwif->dmaproc = &ide_dmaproc;
+
+ if (hwif->chipset != ide_trm290) {
+ byte dma_stat = inb(dma_base+2);
+ printk(", BIOS settings: %s:%s, %s:%s",
+ hwif->drives[0].name, (dma_stat & 0x20) ? "DMA" : "pio",
+ hwif->drives[1].name, (dma_stat & 0x40) ? "DMA" : "pio");
+ }
+ printk("\n");
+ return;
+
+dma_alloc_failure:
+ printk(" -- ERROR, UNABLE TO ALLOCATE DMA TABLES\n");
+}
+
+/*
+ * Fetch the DMA Bus-Master-I/O-Base-Address (BMIBA) from PCI space:
+ */
+unsigned long __init ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name)
+{
+ unsigned long dma_base = 0;
+ struct pci_dev *dev = hwif->pci_dev;
+
+#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED
+ int second_chance = 0;
+
+second_chance_to_dma:
+#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */
+
+ if (hwif->mate && hwif->mate->dma_base) {
+ dma_base = hwif->mate->dma_base - (hwif->channel ? 0 : 8);
+ } else {
+ dma_base = pci_resource_start(dev, 4);
+ if (!dma_base) {
+ printk("%s: dma_base is invalid (0x%04lx)\n", name, dma_base);
+ dma_base = 0;
+ }
+ }
+
+#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED
+ if ((!dma_base) && (!second_chance)) {
+ unsigned long set_bmiba = 0;
+ second_chance++;
+ switch(dev->vendor) {
+ case PCI_VENDOR_ID_AL:
+ set_bmiba = DEFAULT_BMALIBA; break;
+ case PCI_VENDOR_ID_VIA:
+ set_bmiba = DEFAULT_BMCRBA; break;
+ case PCI_VENDOR_ID_INTEL:
+ set_bmiba = DEFAULT_BMIBA; break;
+ default:
+ return dma_base;
+ }
+ pci_write_config_dword(dev, 0x20, set_bmiba|1);
+ goto second_chance_to_dma;
+ }
+#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */
+
+ if (dma_base) {
+ if (extra) /* PDC20246, PDC20262, HPT343, & HPT366 */
+ request_region(dma_base+16, extra, name);
+ dma_base += hwif->channel ? 8 : 0;
+ hwif->dma_extra = extra;
+
+ switch(dev->device) {
+ case PCI_DEVICE_ID_AL_M5219:
+ case PCI_DEVICE_ID_AMD_VIPER_7409:
+ case PCI_DEVICE_ID_CMD_643:
+ outb(inb(dma_base+2) & 0x60, dma_base+2);
+ if (inb(dma_base+2) & 0x80) {
+ printk("%s: simplex device: DMA forced\n", name);
+ }
+ break;
+ default:
+ /*
+ * If the device claims "simplex" DMA,
+ * this means only one of the two interfaces
+ * can be trusted with DMA at any point in time.
+ * So we should enable DMA only on one of the
+ * two interfaces.
+ */
+ if ((inb(dma_base+2) & 0x80)) { /* simplex device? */
+ if ((!hwif->drives[0].present && !hwif->drives[1].present) ||
+ (hwif->mate && hwif->mate->dma_base)) {
+ printk("%s: simplex device: DMA disabled\n", name);
+ dma_base = 0;
+ }
+ }
+ }
+ }
+ return dma_base;
+}
diff --git a/xen/drivers/ide/ide-features.c b/xen/drivers/ide/ide-features.c
new file mode 100644
index 0000000000..a60af2dad3
--- /dev/null
+++ b/xen/drivers/ide/ide-features.c
@@ -0,0 +1,384 @@
+/*
+ * linux/drivers/block/ide-features.c Version 0.04 June 9, 2000
+ *
+ * Copyright (C) 1999-2000 Linus Torvalds & authors (see below)
+ *
+ * Copyright (C) 1999-2000 Andre Hedrick <andre@linux-ide.org>
+ *
+ * Extracts if ide.c to address the evolving transfer rate code for
+ * the SETFEATURES_XFER callouts. Various parts of any given function
+ * are credited to previous ATA-IDE maintainers.
+ *
+ * Auto-CRC downgrade for Ultra DMA(ing)
+ *
+ * May be copied or modified under the terms of the GNU General Public License
+ */
+
+#include <xeno/config.h>
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/hdreg.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+/*
+ * A Verbose noise maker for debugging on the attempted transfer rates.
+ */
+char *ide_xfer_verbose (byte xfer_rate)
+{
+ switch(xfer_rate) {
+ case XFER_UDMA_7: return("UDMA 7");
+ case XFER_UDMA_6: return("UDMA 6");
+ case XFER_UDMA_5: return("UDMA 5");
+ case XFER_UDMA_4: return("UDMA 4");
+ case XFER_UDMA_3: return("UDMA 3");
+ case XFER_UDMA_2: return("UDMA 2");
+ case XFER_UDMA_1: return("UDMA 1");
+ case XFER_UDMA_0: return("UDMA 0");
+ case XFER_MW_DMA_2: return("MW DMA 2");
+ case XFER_MW_DMA_1: return("MW DMA 1");
+ case XFER_MW_DMA_0: return("MW DMA 0");
+ case XFER_SW_DMA_2: return("SW DMA 2");
+ case XFER_SW_DMA_1: return("SW DMA 1");
+ case XFER_SW_DMA_0: return("SW DMA 0");
+ case XFER_PIO_4: return("PIO 4");
+ case XFER_PIO_3: return("PIO 3");
+ case XFER_PIO_2: return("PIO 2");
+ case XFER_PIO_1: return("PIO 1");
+ case XFER_PIO_0: return("PIO 0");
+ case XFER_PIO_SLOW: return("PIO SLOW");
+ default: return("XFER ERROR");
+ }
+}
+
+/*
+ *
+ */
+char *ide_media_verbose (ide_drive_t *drive)
+{
+ switch (drive->media) {
+ case ide_scsi: return("scsi ");
+ case ide_disk: return("disk ");
+ case ide_optical: return("optical");
+ case ide_cdrom: return("cdrom ");
+ case ide_tape: return("tape ");
+ case ide_floppy: return("floppy ");
+ default: return("???????");
+ }
+}
+
+/*
+ * A Verbose noise maker for debugging on the attempted dmaing calls.
+ */
+char *ide_dmafunc_verbose (ide_dma_action_t dmafunc)
+{
+ switch (dmafunc) {
+ case ide_dma_read: return("ide_dma_read");
+ case ide_dma_write: return("ide_dma_write");
+ case ide_dma_begin: return("ide_dma_begin");
+ case ide_dma_end: return("ide_dma_end:");
+ case ide_dma_check: return("ide_dma_check");
+ case ide_dma_on: return("ide_dma_on");
+ case ide_dma_off: return("ide_dma_off");
+ case ide_dma_off_quietly: return("ide_dma_off_quietly");
+ case ide_dma_test_irq: return("ide_dma_test_irq");
+ case ide_dma_bad_drive: return("ide_dma_bad_drive");
+ case ide_dma_good_drive: return("ide_dma_good_drive");
+ case ide_dma_verbose: return("ide_dma_verbose");
+ case ide_dma_retune: return("ide_dma_retune");
+ case ide_dma_lostirq: return("ide_dma_lostirq");
+ case ide_dma_timeout: return("ide_dma_timeout");
+ default: return("unknown");
+ }
+}
+
+/*
+ *
+ */
+byte ide_auto_reduce_xfer (ide_drive_t *drive)
+{
+ if (!drive->crc_count)
+ return drive->current_speed;
+ drive->crc_count = 0;
+
+ switch(drive->current_speed) {
+ case XFER_UDMA_7: return XFER_UDMA_6;
+ case XFER_UDMA_6: return XFER_UDMA_5;
+ case XFER_UDMA_5: return XFER_UDMA_4;
+ case XFER_UDMA_4: return XFER_UDMA_3;
+ case XFER_UDMA_3: return XFER_UDMA_2;
+ case XFER_UDMA_2: return XFER_UDMA_1;
+ case XFER_UDMA_1: return XFER_UDMA_0;
+ /*
+ * OOPS we do not goto non Ultra DMA modes
+ * without iCRC's available we force
+ * the system to PIO and make the user
+ * invoke the ATA-1 ATA-2 DMA modes.
+ */
+ case XFER_UDMA_0:
+ default: return XFER_PIO_4;
+ }
+}
+
+/*
+ * Update the
+ */
+int ide_driveid_update (ide_drive_t *drive)
+{
+ /*
+ * Re-read drive->id for possible DMA mode
+ * change (copied from ide-probe.c)
+ */
+ struct hd_driveid *id;
+ unsigned long timeout, flags;
+
+ SELECT_MASK(HWIF(drive), drive, 1);
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+ ide_delay_50ms();
+ OUT_BYTE(WIN_IDENTIFY, IDE_COMMAND_REG);
+ timeout = jiffies + WAIT_WORSTCASE;
+ do {
+ if (0 < (signed long)(jiffies - timeout)) {
+ SELECT_MASK(HWIF(drive), drive, 0);
+ return 0; /* drive timed-out */
+ }
+ ide_delay_50ms(); /* give drive a breather */
+ } while (IN_BYTE(IDE_ALTSTATUS_REG) & BUSY_STAT);
+ ide_delay_50ms(); /* wait for IRQ and DRQ_STAT */
+ if (!OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
+ SELECT_MASK(HWIF(drive), drive, 0);
+ printk("%s: CHECK for good STATUS\n", drive->name);
+ return 0;
+ }
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only; some systems need this */
+ SELECT_MASK(HWIF(drive), drive, 0);
+ id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC);
+ if (!id) {
+ __restore_flags(flags); /* local CPU only */
+ return 0;
+ }
+ ide_input_data(drive, id, SECTOR_WORDS);
+ (void) GET_STAT(); /* clear drive IRQ */
+ ide__sti(); /* local CPU only */
+ __restore_flags(flags); /* local CPU only */
+ ide_fix_driveid(id);
+ if (id) {
+ drive->id->dma_ultra = id->dma_ultra;
+ drive->id->dma_mword = id->dma_mword;
+ drive->id->dma_1word = id->dma_1word;
+ /* anything more ? */
+ kfree(id);
+ }
+
+ return 1;
+}
+
+/*
+ * Verify that we are doing an approved SETFEATURES_XFER with respect
+ * to the hardware being able to support request. Since some hardware
+ * can improperly report capabilties, we check to see if the host adapter
+ * in combination with the device (usually a disk) properly detect
+ * and acknowledge each end of the ribbon.
+ */
+int ide_ata66_check (ide_drive_t *drive, ide_task_t *args)
+{
+ if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) &&
+ (args->tfRegister[IDE_SECTOR_OFFSET] > XFER_UDMA_2) &&
+ (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER)) {
+ if (!HWIF(drive)->udma_four) {
+ printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", HWIF(drive)->name);
+ return 1;
+ }
+#ifndef CONFIG_IDEDMA_IVB
+ if ((drive->id->hw_config & 0x6000) == 0) {
+#else /* !CONFIG_IDEDMA_IVB */
+ if (((drive->id->hw_config & 0x2000) == 0) ||
+ ((drive->id->hw_config & 0x4000) == 0)) {
+#endif /* CONFIG_IDEDMA_IVB */
+ printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", drive->name);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Backside of HDIO_DRIVE_CMD call of SETFEATURES_XFER.
+ * 1 : Safe to update drive->id DMA registers.
+ * 0 : OOPs not allowed.
+ */
+int set_transfer (ide_drive_t *drive, ide_task_t *args)
+{
+ if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) &&
+ (args->tfRegister[IDE_SECTOR_OFFSET] >= XFER_SW_DMA_0) &&
+ (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER) &&
+ (drive->id->dma_ultra ||
+ drive->id->dma_mword ||
+ drive->id->dma_1word))
+ return 1;
+
+ return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+/*
+ * All hosts that use the 80c ribbon mus use!
+ */
+byte eighty_ninty_three (ide_drive_t *drive)
+{
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ if (HWIF(drive)->pci_devid.vid==0x105a)
+ return(HWIF(drive)->udma_four);
+#endif
+ /* PDC202XX: that's because some HDD will return wrong info */
+ return ((byte) ((HWIF(drive)->udma_four) &&
+#ifndef CONFIG_IDEDMA_IVB
+ (drive->id->hw_config & 0x4000) &&
+#endif /* CONFIG_IDEDMA_IVB */
+ (drive->id->hw_config & 0x6000)) ? 1 : 0);
+}
+#endif // CONFIG_BLK_DEV_IDEDMA
+
+/*
+ * Similar to ide_wait_stat(), except it never calls ide_error internally.
+ * This is a kludge to handle the new ide_config_drive_speed() function,
+ * and should not otherwise be used anywhere. Eventually, the tuneproc's
+ * should be updated to return ide_startstop_t, in which case we can get
+ * rid of this abomination again. :) -ml
+ *
+ * It is gone..........
+ *
+ * const char *msg == consider adding for verbose errors.
+ */
+int ide_config_drive_speed (ide_drive_t *drive, byte speed)
+{
+ ide_hwif_t *hwif = HWIF(drive);
+ int i, error = 1;
+ byte stat;
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+ byte unit = (drive->select.b.unit & 0x01);
+ outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2);
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+ /*
+ * Don't use ide_wait_cmd here - it will
+ * attempt to set_geometry and recalibrate,
+ * but for some reason these don't work at
+ * this point (lost interrupt).
+ */
+ /*
+ * Select the drive, and issue the SETFEATURES command
+ */
+ disable_irq(hwif->irq); /* disable_irq_nosync ?? */
+ udelay(1);
+ SELECT_DRIVE(HWIF(drive), drive);
+ SELECT_MASK(HWIF(drive), drive, 0);
+ udelay(1);
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl | 2, IDE_CONTROL_REG);
+ OUT_BYTE(speed, IDE_NSECTOR_REG);
+ OUT_BYTE(SETFEATURES_XFER, IDE_FEATURE_REG);
+ OUT_BYTE(WIN_SETFEATURES, IDE_COMMAND_REG);
+ if ((IDE_CONTROL_REG) && (drive->quirk_list == 2))
+ OUT_BYTE(drive->ctl, IDE_CONTROL_REG);
+ udelay(1);
+ /*
+ * Wait for drive to become non-BUSY
+ */
+ if ((stat = GET_STAT()) & BUSY_STAT) {
+ unsigned long flags, timeout;
+ __save_flags(flags); /* local CPU only */
+ ide__sti(); /* local CPU only -- for jiffies */
+ timeout = jiffies + WAIT_CMD;
+ while ((stat = GET_STAT()) & BUSY_STAT) {
+ if (0 < (signed long)(jiffies - timeout))
+ break;
+ }
+ __restore_flags(flags); /* local CPU only */
+ }
+
+ /*
+ * Allow status to settle, then read it again.
+ * A few rare drives vastly violate the 400ns spec here,
+ * so we'll wait up to 10usec for a "good" status
+ * rather than expensively fail things immediately.
+ * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
+ */
+ for (i = 0; i < 10; i++) {
+ udelay(1);
+ if (OK_STAT((stat = GET_STAT()), DRIVE_READY, BUSY_STAT|DRQ_STAT|ERR_STAT)) {
+ error = 0;
+ break;
+ }
+ }
+
+ SELECT_MASK(HWIF(drive), drive, 0);
+
+ enable_irq(hwif->irq);
+
+ if (error) {
+ (void) ide_dump_status(drive, "set_drive_speed_status", stat);
+ return error;
+ }
+
+ drive->id->dma_ultra &= ~0xFF00;
+ drive->id->dma_mword &= ~0x0F00;
+ drive->id->dma_1word &= ~0x0F00;
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+ if (speed > XFER_PIO_4) {
+ outb(inb(hwif->dma_base+2)|(1<<(5+unit)), hwif->dma_base+2);
+ } else {
+ outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2);
+ }
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+ switch(speed) {
+ case XFER_UDMA_7: drive->id->dma_ultra |= 0x8080; break;
+ case XFER_UDMA_6: drive->id->dma_ultra |= 0x4040; break;
+ case XFER_UDMA_5: drive->id->dma_ultra |= 0x2020; break;
+ case XFER_UDMA_4: drive->id->dma_ultra |= 0x1010; break;
+ case XFER_UDMA_3: drive->id->dma_ultra |= 0x0808; break;
+ case XFER_UDMA_2: drive->id->dma_ultra |= 0x0404; break;
+ case XFER_UDMA_1: drive->id->dma_ultra |= 0x0202; break;
+ case XFER_UDMA_0: drive->id->dma_ultra |= 0x0101; break;
+ case XFER_MW_DMA_2: drive->id->dma_mword |= 0x0404; break;
+ case XFER_MW_DMA_1: drive->id->dma_mword |= 0x0202; break;
+ case XFER_MW_DMA_0: drive->id->dma_mword |= 0x0101; break;
+ case XFER_SW_DMA_2: drive->id->dma_1word |= 0x0404; break;
+ case XFER_SW_DMA_1: drive->id->dma_1word |= 0x0202; break;
+ case XFER_SW_DMA_0: drive->id->dma_1word |= 0x0101; break;
+ default: break;
+ }
+ return error;
+}
+
+EXPORT_SYMBOL(ide_auto_reduce_xfer);
+EXPORT_SYMBOL(ide_driveid_update);
+EXPORT_SYMBOL(ide_ata66_check);
+EXPORT_SYMBOL(set_transfer);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+EXPORT_SYMBOL(eighty_ninty_three);
+#endif // CONFIG_BLK_DEV_IDEDMA
+EXPORT_SYMBOL(ide_config_drive_speed);
+
diff --git a/xen/drivers/ide/ide-geometry.c b/xen/drivers/ide/ide-geometry.c
new file mode 100644
index 0000000000..22428288b6
--- /dev/null
+++ b/xen/drivers/ide/ide-geometry.c
@@ -0,0 +1,222 @@
+/*
+ * linux/drivers/ide/ide-geometry.c
+ */
+#include <xeno/config.h>
+#include <xeno/ide.h>
+#include <asm/mc146818rtc.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_BLK_DEV_IDE
+
+/*
+ * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc
+ * controller that is BIOS compatible with ST-506, and thus showing up in our
+ * BIOS table, but not register compatible, and therefore not present in CMOS.
+ *
+ * Furthermore, we will assume that our ST-506 drives <if any> are the primary
+ * drives in the system -- the ones reflected as drive 1 or 2. The first
+ * drive is stored in the high nibble of CMOS byte 0x12, the second in the low
+ * nibble. This will be either a 4 bit drive type or 0xf indicating use byte
+ * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS. A non-zero value
+ * means we have an AT controller hard disk for that drive.
+ *
+ * Of course, there is no guarantee that either drive is actually on the
+ * "primary" IDE interface, but we don't bother trying to sort that out here.
+ * If a drive is not actually on the primary interface, then these parameters
+ * will be ignored. This results in the user having to supply the logical
+ * drive geometry as a boot parameter for each drive not on the primary i/f.
+ */
+/*
+ * The only "perfect" way to handle this would be to modify the setup.[cS] code
+ * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info
+ * for us during initialization. I have the necessary docs -- any takers? -ml
+ */
+/*
+ * I did this, but it doesnt work - there is no reasonable way to find the
+ * correspondence between the BIOS numbering of the disks and the Linux
+ * numbering. -aeb
+ *
+ * The code below is bad. One of the problems is that drives 1 and 2
+ * may be SCSI disks (even when IDE disks are present), so that
+ * the geometry we read here from BIOS is attributed to the wrong disks.
+ * Consequently, also the former "drive->present = 1" below was a mistake.
+ *
+ * Eventually the entire routine below should be removed.
+ *
+ * 17-OCT-2000 rjohnson@analogic.com Added spin-locks for reading CMOS
+ * chip.
+ */
+
+void probe_cmos_for_drives (ide_hwif_t *hwif)
+{
+#ifdef __i386__
+ extern struct drive_info_struct drive_info;
+ byte cmos_disks, *BIOS = (byte *) &drive_info;
+ int unit;
+ unsigned long flags;
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (hwif->chipset == ide_pdc4030 && hwif->channel != 0)
+ return;
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+ spin_lock_irqsave(&rtc_lock, flags);
+ cmos_disks = CMOS_READ(0x12);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ /* Extract drive geometry from CMOS+BIOS if not already setup */
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ ide_drive_t *drive = &hwif->drives[unit];
+ if ((cmos_disks & (0xf0 >> (unit*4)))
+ && !drive->present && !drive->nobios) {
+ unsigned short cyl = *(unsigned short *)BIOS;
+ unsigned char head = *(BIOS+2);
+ unsigned char sect = *(BIOS+14);
+ if (cyl > 0 && head > 0 && sect > 0 && sect < 64) {
+ drive->cyl = drive->bios_cyl = cyl;
+ drive->head = drive->bios_head = head;
+ drive->sect = drive->bios_sect = sect;
+ drive->ctl = *(BIOS+8);
+ } else {
+ printk("hd%c: C/H/S=%d/%d/%d from BIOS ignored\n",
+ unit+'a', cyl, head, sect);
+ }
+ }
+ BIOS += 16;
+ }
+#endif
+}
+#endif /* CONFIG_BLK_DEV_IDE */
+
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+
+extern ide_drive_t * get_info_ptr(kdev_t);
+extern unsigned long current_capacity (ide_drive_t *);
+
+/*
+ * If heads is nonzero: find a translation with this many heads and S=63.
+ * Otherwise: find out how OnTrack Disk Manager would translate the disk.
+ */
+static void
+ontrack(ide_drive_t *drive, int heads, unsigned int *c, int *h, int *s) {
+ static const byte dm_head_vals[] = {4, 8, 16, 32, 64, 128, 255, 0};
+ const byte *headp = dm_head_vals;
+ unsigned long total;
+
+ /*
+ * The specs say: take geometry as obtained from Identify,
+ * compute total capacity C*H*S from that, and truncate to
+ * 1024*255*63. Now take S=63, H the first in the sequence
+ * 4, 8, 16, 32, 64, 128, 255 such that 63*H*1024 >= total.
+ * [Please tell aeb@cwi.nl in case this computes a
+ * geometry different from what OnTrack uses.]
+ */
+ total = DRIVER(drive)->capacity(drive);
+
+ *s = 63;
+
+ if (heads) {
+ *h = heads;
+ *c = total / (63 * heads);
+ return;
+ }
+
+ while (63 * headp[0] * 1024 < total && headp[1] != 0)
+ headp++;
+ *h = headp[0];
+ *c = total / (63 * headp[0]);
+}
+
+/*
+ * This routine is called from the partition-table code in pt/msdos.c.
+ * It has two tasks:
+ * (i) to handle Ontrack DiskManager by offsetting everything by 63 sectors,
+ * or to handle EZdrive by remapping sector 0 to sector 1.
+ * (ii) to invent a translated geometry.
+ * Part (i) is suppressed if the user specifies the "noremap" option
+ * on the command line.
+ * Part (ii) is suppressed if the user specifies an explicit geometry.
+ *
+ * The ptheads parameter is either 0 or tells about the number of
+ * heads shown by the end of the first nonempty partition.
+ * If this is either 16, 32, 64, 128, 240 or 255 we'll believe it.
+ *
+ * The xparm parameter has the following meaning:
+ * 0 = convert to CHS with fewer than 1024 cyls
+ * using the same method as Ontrack DiskManager.
+ * 1 = same as "0", plus offset everything by 63 sectors.
+ * -1 = similar to "0", plus redirect sector 0 to sector 1.
+ * 2 = convert to a CHS geometry with "ptheads" heads.
+ *
+ * Returns 0 if the translation was not possible, if the device was not
+ * an IDE disk drive, or if a geometry was "forced" on the commandline.
+ * Returns 1 if the geometry translation was successful.
+ */
+int ide_xlate_1024 (kdev_t i_rdev, int xparm, int ptheads, const char *msg)
+{
+ ide_drive_t *drive;
+ const char *msg1 = "";
+ int heads = 0;
+ int c, h, s;
+ int transl = 1; /* try translation */
+ int ret = 0;
+
+ drive = get_info_ptr(i_rdev);
+ if (!drive)
+ return 0;
+
+ /* remap? */
+ if (drive->remap_0_to_1 != 2) {
+ if (xparm == 1) { /* DM */
+ drive->sect0 = 63;
+ msg1 = " [remap +63]";
+ ret = 1;
+ } else if (xparm == -1) { /* EZ-Drive */
+ if (drive->remap_0_to_1 == 0) {
+ drive->remap_0_to_1 = 1;
+ msg1 = " [remap 0->1]";
+ ret = 1;
+ }
+ }
+ }
+
+ /* There used to be code here that assigned drive->id->CHS
+ to drive->CHS and that to drive->bios_CHS. However,
+ some disks have id->C/H/S = 4092/16/63 but are larger than 2.1 GB.
+ In such cases that code was wrong. Moreover,
+ there seems to be no reason to do any of these things. */
+
+ /* translate? */
+ if (drive->forced_geom)
+ transl = 0;
+
+ /* does ptheads look reasonable? */
+ if (ptheads == 32 || ptheads == 64 || ptheads == 128 ||
+ ptheads == 240 || ptheads == 255)
+ heads = ptheads;
+
+ if (xparm == 2) {
+ if (!heads ||
+ (drive->bios_head >= heads && drive->bios_sect == 63))
+ transl = 0;
+ }
+ if (xparm == -1) {
+ if (drive->bios_head > 16)
+ transl = 0; /* we already have a translation */
+ }
+
+ if (transl) {
+ ontrack(drive, heads, &c, &h, &s);
+ drive->bios_cyl = c;
+ drive->bios_head = h;
+ drive->bios_sect = s;
+ ret = 1;
+ }
+
+ drive->part[0].nr_sects = current_capacity(drive);
+
+ if (ret)
+ printk("%s%s [%d/%d/%d]", msg, msg1,
+ drive->bios_cyl, drive->bios_head, drive->bios_sect);
+ return ret;
+}
+#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
diff --git a/xen/drivers/ide/ide-pci.c b/xen/drivers/ide/ide-pci.c
new file mode 100644
index 0000000000..c8784c6f3d
--- /dev/null
+++ b/xen/drivers/ide/ide-pci.c
@@ -0,0 +1,1016 @@
+/*
+ * linux/drivers/ide/ide-pci.c Version 1.05 June 9, 2000
+ *
+ * Copyright (c) 1998-2000 Andre Hedrick <andre@linux-ide.org>
+ *
+ * Copyright (c) 1995-1998 Mark Lord
+ * May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ * This module provides support for automatic detection and
+ * configuration of all PCI IDE interfaces present in a system.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ide.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#define DEVID_PIIXa ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_0})
+#define DEVID_PIIXb ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_1})
+#define DEVID_MPIIX ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX})
+#define DEVID_PIIX3 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_1})
+#define DEVID_PIIX4 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB})
+#define DEVID_ICH0 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_1})
+#define DEVID_PIIX4E2 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_1})
+#define DEVID_ICH ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_1})
+#define DEVID_PIIX4U2 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82372FB_1})
+#define DEVID_PIIX4NX ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX})
+#define DEVID_ICH2 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_9})
+#define DEVID_ICH2M ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_8})
+#define DEVID_ICH3M ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10})
+#define DEVID_ICH3 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_11})
+#define DEVID_ICH4 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_11})
+#define DEVID_CICH ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801E_11})
+#define DEVID_VIA_IDE ((ide_pci_devid_t){PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C561})
+#define DEVID_MR_IDE ((ide_pci_devid_t){PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576_1})
+#define DEVID_VP_IDE ((ide_pci_devid_t){PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1})
+#define DEVID_PDC20246 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20246})
+#define DEVID_PDC20262 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20262})
+#define DEVID_PDC20265 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20265})
+#define DEVID_PDC20267 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20267})
+#define DEVID_PDC20268 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20268})
+#define DEVID_PDC20270 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20270})
+#define DEVID_PDC20269 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20269})
+#define DEVID_PDC20275 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20275})
+#define DEVID_PDC20276 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20276})
+#define DEVID_RZ1000 ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1000})
+#define DEVID_RZ1001 ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1001})
+#define DEVID_SAMURAI ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE})
+#define DEVID_CMD640 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_640})
+#define DEVID_CMD643 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_643})
+#define DEVID_CMD646 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_646})
+#define DEVID_CMD648 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_648})
+#define DEVID_CMD649 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_649})
+#define DEVID_CMD680 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_680})
+#define DEVID_SIS5513 ((ide_pci_devid_t){PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513})
+#define DEVID_OPTI621 ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C621})
+#define DEVID_OPTI621V ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558})
+#define DEVID_OPTI621X ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C825})
+#define DEVID_TRM290 ((ide_pci_devid_t){PCI_VENDOR_ID_TEKRAM, PCI_DEVICE_ID_TEKRAM_DC290})
+#define DEVID_NS87410 ((ide_pci_devid_t){PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_87410})
+#define DEVID_NS87415 ((ide_pci_devid_t){PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_87415})
+#define DEVID_HT6565 ((ide_pci_devid_t){PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565})
+#define DEVID_AEC6210 ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP, PCI_DEVICE_ID_ARTOP_ATP850UF})
+#define DEVID_AEC6260 ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP, PCI_DEVICE_ID_ARTOP_ATP860})
+#define DEVID_AEC6260R ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP, PCI_DEVICE_ID_ARTOP_ATP860R})
+#define DEVID_W82C105 ((ide_pci_devid_t){PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105})
+#define DEVID_UM8673F ((ide_pci_devid_t){PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8673F})
+#define DEVID_UM8886A ((ide_pci_devid_t){PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886A})
+#define DEVID_UM8886BF ((ide_pci_devid_t){PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF})
+#define DEVID_HPT34X ((ide_pci_devid_t){PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT343})
+#define DEVID_HPT366 ((ide_pci_devid_t){PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT366})
+#define DEVID_ALI15X3 ((ide_pci_devid_t){PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M5229})
+#define DEVID_CY82C693 ((ide_pci_devid_t){PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693})
+#define DEVID_HINT ((ide_pci_devid_t){0x3388, 0x8013})
+#define DEVID_CS5530 ((ide_pci_devid_t){PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_IDE})
+#define DEVID_AMD7401 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_COBRA_7401})
+#define DEVID_AMD7409 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7409})
+#define DEVID_AMD7411 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7411})
+#define DEVID_AMD7441 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7441})
+#define DEVID_PDCADMA ((ide_pci_devid_t){PCI_VENDOR_ID_PDC, PCI_DEVICE_ID_PDC_1841})
+#define DEVID_SLC90E66 ((ide_pci_devid_t){PCI_VENDOR_ID_EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_1})
+#define DEVID_OSB4 ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE})
+#define DEVID_CSB5 ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE})
+#define DEVID_ITE8172G ((ide_pci_devid_t){PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_IT8172G})
+
+#define IDE_IGNORE ((void *)-1)
+#define IDE_NO_DRIVER ((void *)-2)
+
+#ifdef CONFIG_BLK_DEV_AEC62XX
+extern unsigned int pci_init_aec62xx(struct pci_dev *, const char *);
+extern unsigned int ata66_aec62xx(ide_hwif_t *);
+extern void ide_init_aec62xx(ide_hwif_t *);
+extern void ide_dmacapable_aec62xx(ide_hwif_t *, unsigned long);
+#define PCI_AEC62XX &pci_init_aec62xx
+#define ATA66_AEC62XX &ata66_aec62xx
+#define INIT_AEC62XX &ide_init_aec62xx
+#define DMA_AEC62XX &ide_dmacapable_aec62xx
+#else
+#define PCI_AEC62XX NULL
+#define ATA66_AEC62XX NULL
+#define INIT_AEC62XX IDE_NO_DRIVER
+#define DMA_AEC62XX NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_ALI15X3
+extern unsigned int pci_init_ali15x3(struct pci_dev *, const char *);
+extern unsigned int ata66_ali15x3(ide_hwif_t *);
+extern void ide_init_ali15x3(ide_hwif_t *);
+extern void ide_dmacapable_ali15x3(ide_hwif_t *, unsigned long);
+#define PCI_ALI15X3 &pci_init_ali15x3
+#define ATA66_ALI15X3 &ata66_ali15x3
+#define INIT_ALI15X3 &ide_init_ali15x3
+#define DMA_ALI15X3 &ide_dmacapable_ali15x3
+#else
+#define PCI_ALI15X3 NULL
+#define ATA66_ALI15X3 NULL
+#define INIT_ALI15X3 IDE_NO_DRIVER
+#define DMA_ALI15X3 NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_AMD74XX
+extern unsigned int pci_init_amd74xx(struct pci_dev *, const char *);
+extern unsigned int ata66_amd74xx(ide_hwif_t *);
+extern void ide_init_amd74xx(ide_hwif_t *);
+extern void ide_dmacapable_amd74xx(ide_hwif_t *, unsigned long);
+#define PCI_AMD74XX &pci_init_amd74xx
+#define ATA66_AMD74XX &ata66_amd74xx
+#define INIT_AMD74XX &ide_init_amd74xx
+#define DMA_AMD74XX &ide_dmacapable_amd74xx
+#else
+#define PCI_AMD74XX NULL
+#define ATA66_AMD74XX NULL
+#define INIT_AMD74XX IDE_NO_DRIVER
+#define DMA_AMD74XX NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_CMD64X
+extern unsigned int pci_init_cmd64x(struct pci_dev *, const char *);
+extern unsigned int ata66_cmd64x(ide_hwif_t *);
+extern void ide_init_cmd64x(ide_hwif_t *);
+extern void ide_dmacapable_cmd64x(ide_hwif_t *, unsigned long);
+#define PCI_CMD64X &pci_init_cmd64x
+#define ATA66_CMD64X &ata66_cmd64x
+#define INIT_CMD64X &ide_init_cmd64x
+#else
+#define PCI_CMD64X NULL
+#define ATA66_CMD64X NULL
+#ifdef __sparc_v9__
+#define INIT_CMD64X IDE_IGNORE
+#else
+#define INIT_CMD64X IDE_NO_DRIVER
+#endif
+#endif
+
+#ifdef CONFIG_BLK_DEV_CY82C693
+extern unsigned int pci_init_cy82c693(struct pci_dev *, const char *);
+extern void ide_init_cy82c693(ide_hwif_t *);
+#define PCI_CY82C693 &pci_init_cy82c693
+#define INIT_CY82C693 &ide_init_cy82c693
+#else
+#define PCI_CY82C693 NULL
+#define INIT_CY82C693 IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_CS5530
+extern unsigned int pci_init_cs5530(struct pci_dev *, const char *);
+extern void ide_init_cs5530(ide_hwif_t *);
+#define PCI_CS5530 &pci_init_cs5530
+#define INIT_CS5530 &ide_init_cs5530
+#else
+#define PCI_CS5530 NULL
+#define INIT_CS5530 IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_HPT34X
+extern unsigned int pci_init_hpt34x(struct pci_dev *, const char *);
+extern void ide_init_hpt34x(ide_hwif_t *);
+#define PCI_HPT34X &pci_init_hpt34x
+#define INIT_HPT34X &ide_init_hpt34x
+#else
+#define PCI_HPT34X NULL
+#define INIT_HPT34X IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_HPT366
+extern byte hpt363_shared_irq;
+extern byte hpt363_shared_pin;
+extern unsigned int pci_init_hpt366(struct pci_dev *, const char *);
+extern unsigned int ata66_hpt366(ide_hwif_t *);
+extern void ide_init_hpt366(ide_hwif_t *);
+extern void ide_dmacapable_hpt366(ide_hwif_t *, unsigned long);
+#define PCI_HPT366 &pci_init_hpt366
+#define ATA66_HPT366 &ata66_hpt366
+#define INIT_HPT366 &ide_init_hpt366
+#define DMA_HPT366 &ide_dmacapable_hpt366
+#else
+static byte hpt363_shared_irq;
+static byte hpt363_shared_pin;
+#define PCI_HPT366 NULL
+#define ATA66_HPT366 NULL
+#define INIT_HPT366 IDE_NO_DRIVER
+#define DMA_HPT366 NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_NS87415
+extern void ide_init_ns87415(ide_hwif_t *);
+#define INIT_NS87415 &ide_init_ns87415
+#else
+#define INIT_NS87415 IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_OPTI621
+extern void ide_init_opti621(ide_hwif_t *);
+#define INIT_OPTI621 &ide_init_opti621
+#else
+#define INIT_OPTI621 IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_PDC_ADMA
+extern unsigned int pci_init_pdcadma(struct pci_dev *, const char *);
+extern unsigned int ata66_pdcadma(ide_hwif_t *);
+extern void ide_init_pdcadma(ide_hwif_t *);
+extern void ide_dmacapable_pdcadma(ide_hwif_t *, unsigned long);
+#define PCI_PDCADMA &pci_init_pdcadma
+#define ATA66_PDCADMA &ata66_pdcadma
+#define INIT_PDCADMA &ide_init_pdcadma
+#define DMA_PDCADMA &ide_dmacapable_pdcadma
+#else
+#define PCI_PDCADMA IDE_IGNORE
+#define ATA66_PDCADMA IDE_IGNORE
+#define INIT_PDCADMA IDE_IGNORE
+#define DMA_PDCADMA IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_PDC202XX
+extern unsigned int pci_init_pdc202xx(struct pci_dev *, const char *);
+extern unsigned int ata66_pdc202xx(ide_hwif_t *);
+extern void ide_init_pdc202xx(ide_hwif_t *);
+#define PCI_PDC202XX &pci_init_pdc202xx
+#define ATA66_PDC202XX &ata66_pdc202xx
+#define INIT_PDC202XX &ide_init_pdc202xx
+#else
+#define PCI_PDC202XX NULL
+#define ATA66_PDC202XX NULL
+#define INIT_PDC202XX NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_PIIX
+extern unsigned int pci_init_piix(struct pci_dev *, const char *);
+extern unsigned int ata66_piix(ide_hwif_t *);
+extern void ide_init_piix(ide_hwif_t *);
+#define PCI_PIIX &pci_init_piix
+#define ATA66_PIIX &ata66_piix
+#define INIT_PIIX &ide_init_piix
+#else
+#define PCI_PIIX NULL
+#define ATA66_PIIX NULL
+#define INIT_PIIX IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_IT8172
+extern unsigned int pci_init_it8172(struct pci_dev *, const char *);
+extern unsigned int ata66_it8172(ide_hwif_t *);
+extern void ide_init_it8172(ide_hwif_t *);
+#define PCI_IT8172 &pci_init_it8172
+#define INIT_IT8172 &ide_init_it8172
+#else
+#define PCI_IT8172 NULL
+#define ATA66_IT8172 NULL
+#define INIT_IT8172 NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_RZ1000
+extern void ide_init_rz1000(ide_hwif_t *);
+#define INIT_RZ1000 &ide_init_rz1000
+#else
+#define INIT_RZ1000 IDE_IGNORE
+#endif
+
+#define INIT_SAMURAI NULL
+
+#ifdef CONFIG_BLK_DEV_SVWKS
+extern unsigned int pci_init_svwks(struct pci_dev *, const char *);
+extern unsigned int ata66_svwks(ide_hwif_t *);
+extern void ide_init_svwks(ide_hwif_t *);
+#define PCI_SVWKS &pci_init_svwks
+#define ATA66_SVWKS &ata66_svwks
+#define INIT_SVWKS &ide_init_svwks
+#else
+#define PCI_SVWKS NULL
+#define ATA66_SVWKS NULL
+#define INIT_SVWKS IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SIS5513
+extern unsigned int pci_init_sis5513(struct pci_dev *, const char *);
+extern unsigned int ata66_sis5513(ide_hwif_t *);
+extern void ide_init_sis5513(ide_hwif_t *);
+#define PCI_SIS5513 &pci_init_sis5513
+#define ATA66_SIS5513 &ata66_sis5513
+#define INIT_SIS5513 &ide_init_sis5513
+#else
+#define PCI_SIS5513 NULL
+#define ATA66_SIS5513 NULL
+#define INIT_SIS5513 IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SLC90E66
+extern unsigned int pci_init_slc90e66(struct pci_dev *, const char *);
+extern unsigned int ata66_slc90e66(ide_hwif_t *);
+extern void ide_init_slc90e66(ide_hwif_t *);
+#define PCI_SLC90E66 &pci_init_slc90e66
+#define ATA66_SLC90E66 &ata66_slc90e66
+#define INIT_SLC90E66 &ide_init_slc90e66
+#else
+#define PCI_SLC90E66 NULL
+#define ATA66_SLC90E66 NULL
+#define INIT_SLC90E66 IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SL82C105
+extern unsigned int pci_init_sl82c105(struct pci_dev *, const char *);
+extern void dma_init_sl82c105(ide_hwif_t *, unsigned long);
+extern void ide_init_sl82c105(ide_hwif_t *);
+#define PCI_W82C105 &pci_init_sl82c105
+#define DMA_W82C105 &dma_init_sl82c105
+#define INIT_W82C105 &ide_init_sl82c105
+#else
+#define PCI_W82C105 NULL
+#define DMA_W82C105 NULL
+#define INIT_W82C105 IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_TRM290
+extern void ide_init_trm290(ide_hwif_t *);
+#define INIT_TRM290 &ide_init_trm290
+#else
+#define INIT_TRM290 IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_VIA82CXXX
+extern unsigned int pci_init_via82cxxx(struct pci_dev *, const char *);
+extern unsigned int ata66_via82cxxx(ide_hwif_t *);
+extern void ide_init_via82cxxx(ide_hwif_t *);
+extern void ide_dmacapable_via82cxxx(ide_hwif_t *, unsigned long);
+#define PCI_VIA82CXXX &pci_init_via82cxxx
+#define ATA66_VIA82CXXX &ata66_via82cxxx
+#define INIT_VIA82CXXX &ide_init_via82cxxx
+#define DMA_VIA82CXXX &ide_dmacapable_via82cxxx
+#else
+#define PCI_VIA82CXXX NULL
+#define ATA66_VIA82CXXX NULL
+#define INIT_VIA82CXXX IDE_NO_DRIVER
+#define DMA_VIA82CXXX NULL
+#endif
+
+typedef struct ide_pci_enablebit_s {
+ byte reg; /* byte pci reg holding the enable-bit */
+ byte mask; /* mask to isolate the enable-bit */
+ byte val; /* value of masked reg when "enabled" */
+} ide_pci_enablebit_t;
+
+typedef struct ide_pci_device_s {
+ ide_pci_devid_t devid;
+ char *name;
+ unsigned int (*init_chipset)(struct pci_dev *dev, const char *name);
+ unsigned int (*ata66_check)(ide_hwif_t *hwif);
+ void (*init_hwif)(ide_hwif_t *hwif);
+ void (*dma_init)(ide_hwif_t *hwif, unsigned long dmabase);
+ ide_pci_enablebit_t enablebits[2];
+ byte bootable;
+ unsigned int extra;
+} ide_pci_device_t;
+
+static ide_pci_device_t ide_pci_chipsets[] __initdata = {
+ {DEVID_PIIXa, "PIIX", NULL, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_PIIXb, "PIIX", NULL, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_MPIIX, "MPIIX", NULL, NULL, INIT_PIIX, NULL, {{0x6D,0x80,0x80}, {0x6F,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_PIIX3, "PIIX3", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_PIIX4, "PIIX4", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_ICH0, "ICH0", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_PIIX4E2, "PIIX4", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_ICH, "ICH", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_PIIX4U2, "PIIX4", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_PIIX4NX, "PIIX4", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_ICH2, "ICH2", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_ICH2M, "ICH2M", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_ICH3M, "ICH3M", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_ICH3, "ICH3", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_ICH4, "ICH4", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_CICH, "C-ICH", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_VIA_IDE, "VIA_IDE", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_MR_IDE, "VP_IDE", PCI_VIA82CXXX, ATA66_VIA82CXXX,INIT_VIA82CXXX, DMA_VIA82CXXX, {{0x40,0x02,0x02}, {0x40,0x01,0x01}}, ON_BOARD, 0 },
+ {DEVID_VP_IDE, "VP_IDE", PCI_VIA82CXXX, ATA66_VIA82CXXX,INIT_VIA82CXXX, DMA_VIA82CXXX, {{0x40,0x02,0x02}, {0x40,0x01,0x01}}, ON_BOARD, 0 },
+#ifndef CONFIG_PDC202XX_FORCE
+ {DEVID_PDC20246,"PDC20246", PCI_PDC202XX, NULL, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 16 },
+ {DEVID_PDC20262,"PDC20262", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 },
+ {DEVID_PDC20265,"PDC20265", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 },
+ {DEVID_PDC20267,"PDC20267", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 },
+#else /* !CONFIG_PDC202XX_FORCE */
+ {DEVID_PDC20246,"PDC20246", PCI_PDC202XX, NULL, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 16 },
+ {DEVID_PDC20262,"PDC20262", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 },
+ {DEVID_PDC20265,"PDC20265", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 },
+ {DEVID_PDC20267,"PDC20267", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 },
+#endif
+ {DEVID_PDC20268,"PDC20268", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 },
+ /* Promise used a different PCI ident for the raid card apparently to try and
+ prevent Linux detecting it and using our own raid code. We want to detect
+ it for the ataraid drivers, so we have to list both here.. */
+ {DEVID_PDC20270,"PDC20270", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 },
+ {DEVID_PDC20269,"PDC20269", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 },
+ {DEVID_PDC20275,"PDC20275", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 },
+ {DEVID_PDC20276,"PDC20276", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 },
+ {DEVID_RZ1000, "RZ1000", NULL, NULL, INIT_RZ1000, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_RZ1001, "RZ1001", NULL, NULL, INIT_RZ1000, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_SAMURAI, "SAMURAI", NULL, NULL, INIT_SAMURAI, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_CMD640, "CMD640", NULL, NULL, IDE_IGNORE, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_NS87410, "NS87410", NULL, NULL, NULL, NULL, {{0x43,0x08,0x08}, {0x47,0x08,0x08}}, ON_BOARD, 0 },
+ {DEVID_SIS5513, "SIS5513", PCI_SIS5513, ATA66_SIS5513, INIT_SIS5513, NULL, {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, ON_BOARD, 0 },
+ {DEVID_CMD643, "CMD643", PCI_CMD64X, NULL, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_CMD646, "CMD646", PCI_CMD64X, NULL, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x51,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_CMD648, "CMD648", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_CMD649, "CMD649", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+#ifndef CONFIG_BLK_DEV_CMD680
+ {DEVID_CMD680, "CMD680", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+#else /* CONFIG_BLK_DEV_CMD680 */
+ {DEVID_CMD680, "CMD680", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+#endif /* !CONFIG_BLK_DEV_CMD680 */
+ {DEVID_HT6565, "HT6565", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_OPTI621, "OPTI621", NULL, NULL, INIT_OPTI621, NULL, {{0x45,0x80,0x00}, {0x40,0x08,0x00}}, ON_BOARD, 0 },
+ {DEVID_OPTI621X,"OPTI621X", NULL, NULL, INIT_OPTI621, NULL, {{0x45,0x80,0x00}, {0x40,0x08,0x00}}, ON_BOARD, 0 },
+ {DEVID_TRM290, "TRM290", NULL, NULL, INIT_TRM290, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_NS87415, "NS87415", NULL, NULL, INIT_NS87415, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_AEC6210, "AEC6210", PCI_AEC62XX, NULL, INIT_AEC62XX, DMA_AEC62XX, {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, OFF_BOARD, 0 },
+ {DEVID_AEC6260, "AEC6260", PCI_AEC62XX, ATA66_AEC62XX, INIT_AEC62XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, NEVER_BOARD, 0 },
+ {DEVID_AEC6260R,"AEC6260R", PCI_AEC62XX, ATA66_AEC62XX, INIT_AEC62XX, NULL, {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, OFF_BOARD, 0 },
+ {DEVID_W82C105, "W82C105", PCI_W82C105, NULL, INIT_W82C105, DMA_W82C105, {{0x40,0x01,0x01}, {0x40,0x10,0x10}}, ON_BOARD, 0 },
+ {DEVID_UM8673F, "UM8673F", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_UM8886A, "UM8886A", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_UM8886BF,"UM8886BF", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_HPT34X, "HPT34X", PCI_HPT34X, NULL, INIT_HPT34X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, NEVER_BOARD, 16 },
+ {DEVID_HPT366, "HPT366", PCI_HPT366, ATA66_HPT366, INIT_HPT366, DMA_HPT366, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 240 },
+ {DEVID_ALI15X3, "ALI15X3", PCI_ALI15X3, ATA66_ALI15X3, INIT_ALI15X3, DMA_ALI15X3, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_CY82C693,"CY82C693", PCI_CY82C693, NULL, INIT_CY82C693, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_HINT, "HINT_IDE", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_CS5530, "CS5530", PCI_CS5530, NULL, INIT_CS5530, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_AMD7401, "AMD7401", NULL, NULL, NULL, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 },
+ {DEVID_AMD7409, "AMD7409", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 },
+ {DEVID_AMD7411, "AMD7411", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 },
+ {DEVID_AMD7441, "AMD7441", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 },
+ {DEVID_PDCADMA, "PDCADMA", PCI_PDCADMA, ATA66_PDCADMA, INIT_PDCADMA, DMA_PDCADMA, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 },
+ {DEVID_SLC90E66,"SLC90E66", PCI_SLC90E66, ATA66_SLC90E66, INIT_SLC90E66, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 },
+ {DEVID_OSB4, "ServerWorks OSB4", PCI_SVWKS, ATA66_SVWKS, INIT_SVWKS, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_CSB5, "ServerWorks CSB5", PCI_SVWKS, ATA66_SVWKS, INIT_SVWKS, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 },
+ {DEVID_ITE8172G,"IT8172G", PCI_IT8172, NULL, INIT_IT8172, NULL, {{0x00,0x00,0x00}, {0x40,0x00,0x01}}, ON_BOARD, 0 },
+ {IDE_PCI_DEVID_NULL, "PCI_IDE", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }};
+
+/*
+ * This allows offboard ide-pci cards the enable a BIOS, verify interrupt
+ * settings of split-mirror pci-config space, place chipset into init-mode,
+ * and/or preserve an interrupt if the card is not native ide support.
+ */
+static unsigned int __init ide_special_settings (struct pci_dev *dev, const char *name)
+{
+ switch(dev->device) {
+ case PCI_DEVICE_ID_TTI_HPT366:
+ case PCI_DEVICE_ID_PROMISE_20246:
+ case PCI_DEVICE_ID_PROMISE_20262:
+ case PCI_DEVICE_ID_PROMISE_20265:
+ case PCI_DEVICE_ID_PROMISE_20267:
+ case PCI_DEVICE_ID_PROMISE_20268:
+ case PCI_DEVICE_ID_PROMISE_20270:
+ case PCI_DEVICE_ID_PROMISE_20269:
+ case PCI_DEVICE_ID_PROMISE_20275:
+ case PCI_DEVICE_ID_PROMISE_20276:
+ case PCI_DEVICE_ID_ARTOP_ATP850UF:
+ case PCI_DEVICE_ID_ARTOP_ATP860:
+ case PCI_DEVICE_ID_ARTOP_ATP860R:
+ return dev->irq;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Match a PCI IDE port against an entry in ide_hwifs[],
+ * based on io_base port if possible.
+ */
+static ide_hwif_t __init *ide_match_hwif (unsigned long io_base, byte bootable, const char *name)
+{
+ int h;
+ ide_hwif_t *hwif;
+
+ /*
+ * Look for a hwif with matching io_base specified using
+ * parameters to ide_setup().
+ */
+ for (h = 0; h < MAX_HWIFS; ++h) {
+ hwif = &ide_hwifs[h];
+ if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) {
+ if (hwif->chipset == ide_generic)
+ return hwif; /* a perfect match */
+ }
+ }
+ /*
+ * Look for a hwif with matching io_base default value.
+ * If chipset is "ide_unknown", then claim that hwif slot.
+ * Otherwise, some other chipset has already claimed it.. :(
+ */
+ for (h = 0; h < MAX_HWIFS; ++h) {
+ hwif = &ide_hwifs[h];
+ if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) {
+ if (hwif->chipset == ide_unknown)
+ return hwif; /* match */
+ printk("%s: port 0x%04lx already claimed by %s\n", name, io_base, hwif->name);
+ return NULL; /* already claimed */
+ }
+ }
+ /*
+ * Okay, there is no hwif matching our io_base,
+ * so we'll just claim an unassigned slot.
+ * Give preference to claiming other slots before claiming ide0/ide1,
+ * just in case there's another interface yet-to-be-scanned
+ * which uses ports 1f0/170 (the ide0/ide1 defaults).
+ *
+ * Unless there is a bootable card that does not use the standard
+ * ports 1f0/170 (the ide0/ide1 defaults). The (bootable) flag.
+ */
+ if (bootable) {
+ for (h = 0; h < MAX_HWIFS; ++h) {
+ hwif = &ide_hwifs[h];
+ if (hwif->chipset == ide_unknown)
+ return hwif; /* pick an unused entry */
+ }
+ } else {
+ for (h = 2; h < MAX_HWIFS; ++h) {
+ hwif = ide_hwifs + h;
+ if (hwif->chipset == ide_unknown)
+ return hwif; /* pick an unused entry */
+ }
+ }
+ for (h = 0; h < 2; ++h) {
+ hwif = ide_hwifs + h;
+ if (hwif->chipset == ide_unknown)
+ return hwif; /* pick an unused entry */
+ }
+ printk("%s: too many IDE interfaces, no room in table\n", name);
+ return NULL;
+}
+
+static int __init ide_setup_pci_baseregs (struct pci_dev *dev, const char *name)
+{
+ byte reg, progif = 0;
+
+ /*
+ * Place both IDE interfaces into PCI "native" mode:
+ */
+ if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) {
+ if ((progif & 0xa) != 0xa) {
+ printk("%s: device not capable of full native PCI mode\n", name);
+ return 1;
+ }
+ printk("%s: placing both ports into native PCI mode\n", name);
+ (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
+ if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) {
+ printk("%s: rewrite of PROGIF failed, wanted 0x%04x, got 0x%04x\n", name, progif|5, progif);
+ return 1;
+ }
+ }
+ /*
+ * Setup base registers for IDE command/control spaces for each interface:
+ */
+ for (reg = 0; reg < 4; reg++) {
+ struct resource *res = dev->resource + reg;
+ if ((res->flags & IORESOURCE_IO) == 0)
+ continue;
+ if (!res->start) {
+ printk("%s: Missing I/O address #%d\n", name, reg);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * ide_setup_pci_device() looks at the primary/secondary interfaces
+ * on a PCI IDE device and, if they are enabled, prepares the IDE driver
+ * for use with them. This generic code works for most PCI chipsets.
+ *
+ * One thing that is not standardized is the location of the
+ * primary/secondary interface "enable/disable" bits. For chipsets that
+ * we "know" about, this information is in the ide_pci_device_t struct;
+ * for all other chipsets, we just assume both interfaces are enabled.
+ */
+static void __init ide_setup_pci_device (struct pci_dev *dev, ide_pci_device_t *d)
+{
+ unsigned int port, at_least_one_hwif_enabled = 0, autodma = 0, pciirq = 0;
+ unsigned short pcicmd = 0, tried_config = 0;
+ byte tmp = 0;
+ ide_hwif_t *hwif, *mate = NULL;
+ unsigned int class_rev;
+ static int secondpdc = 0;
+
+#ifdef CONFIG_IDEDMA_AUTO
+ if (!noautodma)
+ autodma = 1;
+#endif
+
+ if (d->init_hwif == IDE_NO_DRIVER) {
+ printk(KERN_WARNING "%s: detected chipset, but driver not compiled in!\n", d->name);
+ d->init_hwif = NULL;
+ }
+
+ if (pci_enable_device(dev)) {
+ if(pci_enable_device_bars(dev, 1<<4))
+ {
+ printk(KERN_WARNING "%s: (ide_setup_pci_device:) Could not enable device.\n", d->name);
+ return;
+ }
+ printk(KERN_INFO "%s: BIOS setup was incomplete.\n", d->name);
+ }
+
+check_if_enabled:
+ if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd)) {
+ printk("%s: error accessing PCI regs\n", d->name);
+ return;
+ }
+ if (!(pcicmd & PCI_COMMAND_IO)) { /* is device disabled? */
+ /*
+ * PnP BIOS was *supposed* to have set this device up for us,
+ * but we can do it ourselves, so long as the BIOS has assigned an IRQ
+ * (or possibly the device is using a "legacy header" for IRQs).
+ * Maybe the user deliberately *disabled* the device,
+ * but we'll eventually ignore it again if no drives respond.
+ */
+ if (tried_config++
+ || ide_setup_pci_baseregs(dev, d->name)
+ || pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_IO)) {
+ printk("%s: device disabled (BIOS)\n", d->name);
+ return;
+ }
+ autodma = 0; /* default DMA off if we had to configure it here */
+ goto check_if_enabled;
+ }
+ if (tried_config)
+ printk("%s: device enabled (Linux)\n", d->name);
+
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+ class_rev &= 0xff;
+
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X)) {
+ /* see comments in hpt34x.c on why..... */
+ char *chipset_names[] = {"HPT343", "HPT345"};
+ strcpy(d->name, chipset_names[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0]);
+ d->bootable = (pcicmd & PCI_COMMAND_MEMORY) ? OFF_BOARD : NEVER_BOARD;
+ }
+
+ printk("%s: chipset revision %d\n", d->name, class_rev);
+
+ /*
+ * Can we trust the reported IRQ?
+ */
+ pciirq = dev->irq;
+
+ if (dev->class >> 8 == PCI_CLASS_STORAGE_RAID)
+ {
+ /* By rights we want to ignore these, but the Promise Fastrak
+ * people have some strange ideas about proprietary so we have
+ * to act otherwise on those. The supertrak however we need
+ * to skip
+ */
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265))
+ {
+ printk(KERN_INFO "ide: Found promise 20265 in RAID mode.\n");
+ if(dev->bus->self && dev->bus->self->vendor == PCI_VENDOR_ID_INTEL &&
+ dev->bus->self->device == PCI_DEVICE_ID_INTEL_I960)
+ {
+ printk(KERN_INFO "ide: Skipping Promise PDC20265 attached to I2O RAID controller.\n");
+ return;
+ }
+ }
+ /* Its attached to something else, just a random bridge.
+ Suspect a fastrak and fall through */
+ }
+
+ if ((dev->class & ~(0xfa)) != ((PCI_CLASS_STORAGE_IDE << 8) | 5)) {
+ printk("%s: not 100%% native mode: will probe irqs later\n", d->name);
+ /*
+ * This allows offboard ide-pci cards the enable a BIOS,
+ * verify interrupt settings of split-mirror pci-config
+ * space, place chipset into init-mode, and/or preserve
+ * an interrupt if the card is not native ide support.
+ */
+ pciirq = (d->init_chipset) ? d->init_chipset(dev, d->name) : ide_special_settings(dev, d->name);
+ } else if (tried_config) {
+ printk("%s: will probe irqs later\n", d->name);
+ pciirq = 0;
+ } else if (!pciirq) {
+ printk("%s: bad irq (%d): will probe later\n", d->name, pciirq);
+ pciirq = 0;
+ } else {
+ if (d->init_chipset)
+ (void) d->init_chipset(dev, d->name);
+#ifdef __sparc__
+ printk("%s: 100%% native mode on irq %s\n",
+ d->name, __irq_itoa(pciirq));
+#else
+ printk("%s: 100%% native mode on irq %d\n", d->name, pciirq);
+#endif
+ }
+
+ /*
+ * Set up the IDE ports
+ */
+ for (port = 0; port <= 1; ++port) {
+ unsigned long base = 0, ctl = 0;
+ ide_pci_enablebit_t *e = &(d->enablebits[port]);
+
+ /*
+ * If this is a Promise FakeRaid controller, the 2nd controller will be marked as
+ * disabled while it is actually there and enabled by the bios for raid purposes.
+ * Skip the normal "is it enabled" test for those.
+ */
+ if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265)) && (secondpdc++==1) && (port==1) )
+ goto controller_ok;
+ if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262)) && (secondpdc++==1) && (port==1) )
+ goto controller_ok;
+
+ if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) || (tmp & e->mask) != e->val))
+ continue; /* port not enabled */
+controller_ok:
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) && (port) && (class_rev < 0x03))
+ return;
+ if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE || (dev->class & (port ? 4 : 1)) != 0) {
+ ctl = dev->resource[(2*port)+1].start;
+ base = dev->resource[2*port].start;
+ if (!(ctl & PCI_BASE_ADDRESS_IO_MASK) ||
+ !(base & PCI_BASE_ADDRESS_IO_MASK)) {
+ printk("%s: IO baseregs (BIOS) are reported as MEM, report to <andre@linux-ide.org>.\n", d->name);
+#if 0
+ /* FIXME! This really should check that it really gets the IO/MEM part right! */
+ continue;
+#endif
+ }
+ }
+ if ((ctl && !base) || (base && !ctl)) {
+ printk("%s: inconsistent baseregs (BIOS) for port %d, skipping\n", d->name, port);
+ continue;
+ }
+ if (!ctl)
+ ctl = port ? 0x374 : 0x3f4; /* use default value */
+ if (!base)
+ base = port ? 0x170 : 0x1f0; /* use default value */
+ if ((hwif = ide_match_hwif(base, d->bootable, d->name)) == NULL)
+ continue; /* no room in ide_hwifs[] */
+ if (hwif->io_ports[IDE_DATA_OFFSET] != base) {
+ ide_init_hwif_ports(&hwif->hw, base, (ctl | 2), NULL);
+ memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+ hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
+ }
+ hwif->chipset = ide_pci;
+ hwif->pci_dev = dev;
+ hwif->pci_devid = d->devid;
+ hwif->channel = port;
+ if (!hwif->irq)
+ hwif->irq = pciirq;
+ if (mate) {
+ hwif->mate = mate;
+ mate->mate = hwif;
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210)) {
+ hwif->serialized = 1;
+ mate->serialized = 1;
+ }
+ }
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886BF) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8673F)) {
+ hwif->irq = hwif->channel ? 15 : 14;
+ goto bypass_umc_dma;
+ }
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_MPIIX))
+ goto bypass_piix_dma;
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDCADMA))
+ goto bypass_legacy_dma;
+ if (hwif->udma_four) {
+ printk("%s: ATA-66/100 forced bit set (WARNING)!!\n", d->name);
+ } else {
+ hwif->udma_four = (d->ata66_check) ? d->ata66_check(hwif) : 0;
+ }
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_SIS5513) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PIIX4NX) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_VIA_IDE) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_MR_IDE) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_VP_IDE))
+ autodma = 0;
+ if (autodma)
+ hwif->autodma = 1;
+
+ if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20246) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20267) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20268) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20269) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20275) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20276) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260R) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD646) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD648) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD649) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD680) ||
+ IDE_PCI_DEVID_EQ(d->devid, DEVID_OSB4) ||
+ ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 0x80))) {
+ unsigned long dma_base = ide_get_or_set_dma_base(hwif, (!mate && d->extra) ? d->extra : 0, d->name);
+ if (dma_base && !(pcicmd & PCI_COMMAND_MASTER)) {
+ /*
+ * Set up BM-DMA capability (PnP BIOS should have done this)
+ */
+ if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530))
+ hwif->autodma = 0; /* default DMA off if we had to configure it here */
+ (void) pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_MASTER);
+ if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) || !(pcicmd & PCI_COMMAND_MASTER)) {
+ printk("%s: %s error updating PCICMD\n", hwif->name, d->name);
+ dma_base = 0;
+ }
+ }
+ if (dma_base) {
+ if (d->dma_init) {
+ d->dma_init(hwif, dma_base);
+ } else {
+ ide_setup_dma(hwif, dma_base, 8);
+ }
+ } else {
+ printk("%s: %s Bus-Master DMA disabled (BIOS)\n", hwif->name, d->name);
+ }
+ }
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+bypass_legacy_dma:
+bypass_piix_dma:
+bypass_umc_dma:
+ if (d->init_hwif) /* Call chipset-specific routine for each enabled hwif */
+ d->init_hwif(hwif);
+ mate = hwif;
+ at_least_one_hwif_enabled = 1;
+ }
+ if (!at_least_one_hwif_enabled)
+ printk("%s: neither IDE port enabled (BIOS)\n", d->name);
+}
+
+static void __init pdc20270_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d)
+{
+ struct pci_dev *dev2 = NULL, *findev;
+ ide_pci_device_t *d2;
+
+ if ((dev->bus->self &&
+ dev->bus->self->vendor == PCI_VENDOR_ID_DEC) &&
+ (dev->bus->self->device == PCI_DEVICE_ID_DEC_21150)) {
+ if (PCI_SLOT(dev->devfn) & 2) {
+ return;
+ }
+ d->extra = 0;
+ pci_for_each_dev(findev) {
+ if ((findev->vendor == dev->vendor) &&
+ (findev->device == dev->device) &&
+ (PCI_SLOT(findev->devfn) & 2)) {
+ byte irq = 0, irq2 = 0;
+ dev2 = findev;
+ pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+ pci_read_config_byte(dev2, PCI_INTERRUPT_LINE, &irq2);
+ if (irq != irq2) {
+ dev2->irq = dev->irq;
+ pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, irq);
+ }
+
+ }
+ }
+ }
+
+ printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+ ide_setup_pci_device(dev, d);
+ if (!dev2)
+ return;
+ d2 = d;
+ printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn);
+ ide_setup_pci_device(dev2, d2);
+}
+
+static void __init hpt366_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d)
+{
+ struct pci_dev *dev2 = NULL, *findev;
+ ide_pci_device_t *d2;
+ unsigned char pin1 = 0, pin2 = 0;
+ unsigned int class_rev;
+ char *chipset_names[] = {"HPT366", "HPT366", "HPT368", "HPT370", "HPT370A", "HPT372"};
+
+ if (PCI_FUNC(dev->devfn) & 1)
+ return;
+
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+ class_rev &= 0xff;
+ if (class_rev > 5)
+ class_rev = 5;
+
+ strcpy(d->name, chipset_names[class_rev]);
+
+ switch(class_rev) {
+ case 4:
+ case 3: printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+ ide_setup_pci_device(dev, d);
+ return;
+ default: break;
+ }
+
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin1);
+ pci_for_each_dev(findev) {
+ if ((findev->vendor == dev->vendor) &&
+ (findev->device == dev->device) &&
+ ((findev->devfn - dev->devfn) == 1) &&
+ (PCI_FUNC(findev->devfn) & 1)) {
+ dev2 = findev;
+ pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin2);
+ hpt363_shared_pin = (pin1 != pin2) ? 1 : 0;
+ hpt363_shared_irq = (dev->irq == dev2->irq) ? 1 : 0;
+ if (hpt363_shared_pin && hpt363_shared_irq) {
+ d->bootable = ON_BOARD;
+ printk("%s: onboard version of chipset, pin1=%d pin2=%d\n", d->name, pin1, pin2);
+#if 0
+ /* I forgot why I did this once, but it fixed something. */
+ pci_write_config_byte(dev2, PCI_INTERRUPT_PIN, dev->irq);
+ printk("PCI: %s: Fixing interrupt %d pin %d to ZERO \n", d->name, dev2->irq, pin2);
+ pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, 0);
+#endif
+ }
+ break;
+ }
+ }
+ printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+ ide_setup_pci_device(dev, d);
+ if (!dev2)
+ return;
+ d2 = d;
+ printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn);
+ ide_setup_pci_device(dev2, d2);
+}
+
+/*
+ * ide_scan_pcibus() gets invoked at boot time from ide.c.
+ * It finds all PCI IDE controllers and calls ide_setup_pci_device for them.
+ */
+void __init ide_scan_pcidev (struct pci_dev *dev)
+{
+ ide_pci_devid_t devid;
+ ide_pci_device_t *d;
+
+ devid.vid = dev->vendor;
+ devid.did = dev->device;
+ for (d = ide_pci_chipsets; d->devid.vid && !IDE_PCI_DEVID_EQ(d->devid, devid); ++d);
+ if (d->init_hwif == IDE_IGNORE)
+ printk("%s: ignored by ide_scan_pci_device() (uses own driver)\n", d->name);
+ else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_OPTI621V) && !(PCI_FUNC(dev->devfn) & 1))
+ return;
+ else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)))
+ return; /* CY82C693 is more than only a IDE controller */
+ else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_ITE8172G) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)))
+ return; /* IT8172G is also more than only an IDE controller */
+ else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) && !(PCI_FUNC(dev->devfn) & 1))
+ return; /* UM8886A/BF pair */
+ else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366))
+ hpt366_device_order_fixup(dev, d);
+ else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270))
+ pdc20270_device_order_fixup(dev, d);
+ else if (!IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL) || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE) {
+ if (IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL))
+ printk("%s: unknown IDE controller on PCI bus %02x device %02x, VID=%04x, DID=%04x\n",
+ d->name, dev->bus->number, dev->devfn, devid.vid, devid.did);
+ else
+ printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+ ide_setup_pci_device(dev, d);
+ }
+}
+
+void __init ide_scan_pcibus (int scan_direction)
+{
+ struct pci_dev *dev;
+
+ if (!scan_direction) {
+ pci_for_each_dev(dev) {
+ ide_scan_pcidev(dev);
+ }
+ } else {
+ pci_for_each_dev_reverse(dev) {
+ ide_scan_pcidev(dev);
+ }
+ }
+}
diff --git a/xen/drivers/ide/ide-probe.c b/xen/drivers/ide/ide-probe.c
new file mode 100644
index 0000000000..99f38dfcb8
--- /dev/null
+++ b/xen/drivers/ide/ide-probe.c
@@ -0,0 +1,1066 @@
+/*
+ * linux/drivers/ide/ide-probe.c Version 1.07 March 18, 2001
+ *
+ * Copyright (C) 1994-1998 Linus Torvalds & authors (see below)
+ */
+
+/*
+ * Mostly written by Mark Lord <mlord@pobox.com>
+ * and Gadi Oxman <gadio@netvision.net.il>
+ * and Andre Hedrick <andre@linux-ide.org>
+ *
+ * See linux/MAINTAINERS for address of current maintainer.
+ *
+ * This is the IDE probe module, as evolved from hd.c and ide.c.
+ *
+ * Version 1.00 move drive probing code from ide.c to ide-probe.c
+ * Version 1.01 fix compilation problem for m68k
+ * Version 1.02 increase WAIT_PIDENTIFY to avoid CD-ROM locking at boot
+ * by Andrea Arcangeli
+ * Version 1.03 fix for (hwif->chipset == ide_4drives)
+ * Version 1.04 fixed buggy treatments of known flash memory cards
+ *
+ * Version 1.05 fix for (hwif->chipset == ide_pdc4030)
+ * added ide6/7/8/9
+ * allowed for secondary flash card to be detectable
+ * with new flag : drive->ata_flash : 1;
+ * Version 1.06 stream line request queue and prep for cascade project.
+ * Version 1.07 max_sect <= 255; slower disks would get behind and
+ * then fall over when they get to 256. Paul G.
+ */
+
+#undef REALLY_SLOW_IO /* most systems can safely undef this */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/slab.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+#include <xeno/spinlock.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define IDE_PROBE_TRACE 0
+
+static inline void do_identify (ide_drive_t *drive, byte cmd)
+{
+ int bswap = 1;
+ struct hd_driveid *id;
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::do_identify\n");
+ }
+
+ id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_ATOMIC); /* called with interrupts disabled! */
+ if (!id) {
+ printk(KERN_WARNING "(ide-probe::do_identify) Out of memory.\n");
+ goto err_kmalloc;
+ }
+
+ ide_input_data(drive, id, SECTOR_WORDS); /* read 512 bytes of id info */
+ ide__sti(); /* local CPU only */
+ ide_fix_driveid(id);
+
+ if (id->word156 == 0x4d42) {
+ printk("%s: drive->id->word156 == 0x%04x \n", drive->name, drive->id->word156);
+ }
+
+ if (!drive->forced_lun)
+ drive->last_lun = id->last_lun & 0x7;
+#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA)
+ /*
+ * EATA SCSI controllers do a hardware ATA emulation:
+ * Ignore them if there is a driver for them available.
+ */
+ if ((id->model[0] == 'P' && id->model[1] == 'M')
+ || (id->model[0] == 'S' && id->model[1] == 'K')) {
+ printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model);
+ goto err_misc;
+ }
+#endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */
+
+ /*
+ * WIN_IDENTIFY returns little-endian info,
+ * WIN_PIDENTIFY *usually* returns little-endian info.
+ */
+ if (cmd == WIN_PIDENTIFY) {
+ if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */
+ || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */
+ || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */
+ bswap ^= 1; /* Vertos drives may still be weird */
+ }
+ ide_fixstring (id->model, sizeof(id->model), bswap);
+ ide_fixstring (id->fw_rev, sizeof(id->fw_rev), bswap);
+ ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap);
+
+ if (strstr(id->model, "E X A B Y T E N E S T"))
+ goto err_misc;
+
+ id->model[sizeof(id->model)-1] = '\0'; /* we depend on this a lot! */
+ printk("%s: %s, ", drive->name, id->model);
+ drive->present = 1;
+
+ /*
+ * Check for an ATAPI device
+ */
+ if (cmd == WIN_PIDENTIFY) {
+ byte type = (id->config >> 8) & 0x1f;
+ printk("ATAPI ");
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (HWIF(drive)->channel == 1 && HWIF(drive)->chipset == ide_pdc4030) {
+ printk(" -- not supported on 2nd Promise port\n");
+ goto err_misc;
+ }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+ switch (type) {
+ case ide_floppy:
+ if (!strstr(id->model, "CD-ROM")) {
+ if (!strstr(id->model, "oppy") && !strstr(id->model, "poyp") && !strstr(id->model, "ZIP"))
+ printk("cdrom or floppy?, assuming ");
+ if (drive->media != ide_cdrom) {
+ printk ("FLOPPY");
+ break;
+ }
+ }
+ type = ide_cdrom; /* Early cdrom models used zero */
+ case ide_cdrom:
+ drive->removable = 1;
+#ifdef CONFIG_PPC
+ /* kludge for Apple PowerBook internal zip */
+ if (!strstr(id->model, "CD-ROM") && strstr(id->model, "ZIP")) {
+ printk ("FLOPPY");
+ type = ide_floppy;
+ break;
+ }
+#endif
+ printk ("CD/DVD-ROM");
+ break;
+ case ide_tape:
+ printk ("TAPE");
+ break;
+ case ide_optical:
+ printk ("OPTICAL");
+ drive->removable = 1;
+ break;
+ default:
+ printk("UNKNOWN (type %d)", type);
+ break;
+ }
+ printk (" drive\n");
+ drive->media = type;
+ return;
+ }
+
+ /*
+ * Not an ATAPI device: looks like a "regular" hard disk
+ */
+ if (id->config & (1<<7))
+ drive->removable = 1;
+ /*
+ * Prevent long system lockup probing later for non-existant
+ * slave drive if the hwif is actually a flash memory card of some variety:
+ */
+ if (drive_is_flashcard(drive)) {
+ ide_drive_t *mate = &HWIF(drive)->drives[1^drive->select.b.unit];
+ if (!mate->ata_flash) {
+ mate->present = 0;
+ mate->noprobe = 1;
+ }
+ }
+ drive->media = ide_disk;
+ printk("ATA DISK drive\n");
+ QUIRK_LIST(HWIF(drive),drive);
+ return;
+
+err_misc:
+ kfree(id);
+err_kmalloc:
+ drive->present = 0;
+ return;
+}
+
+/*
+ * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive
+ * and waits for a response. It also monitors irqs while this is
+ * happening, in hope of automatically determining which one is
+ * being used by the interface.
+ *
+ * Returns: 0 device was identified
+ * 1 device timed-out (no response to identify request)
+ * 2 device aborted the command (refused to identify itself)
+ */
+static int actual_try_to_identify (ide_drive_t *drive, byte cmd)
+{
+ int rc;
+ ide_ioreg_t hd_status;
+ unsigned long timeout;
+ byte s, a;
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::actual_try_to_identify\n");
+ }
+
+ if (IDE_CONTROL_REG) {
+ /* take a deep breath */
+ ide_delay_50ms();
+ a = IN_BYTE(IDE_ALTSTATUS_REG);
+ s = IN_BYTE(IDE_STATUS_REG);
+ if ((a ^ s) & ~INDEX_STAT) {
+ printk("%s: probing with STATUS(0x%02x) instead of ALTSTATUS(0x%02x)\n", drive->name, s, a);
+ hd_status = IDE_STATUS_REG; /* ancient Seagate drives, broken interfaces */
+ } else {
+ hd_status = IDE_ALTSTATUS_REG; /* use non-intrusive polling */
+ }
+ } else {
+ ide_delay_50ms();
+ hd_status = IDE_STATUS_REG;
+ }
+
+ /* set features register for atapi identify command to be sure of reply */
+ if ((cmd == WIN_PIDENTIFY))
+ OUT_BYTE(0,IDE_FEATURE_REG); /* disable dma & overlap */
+
+#if CONFIG_BLK_DEV_PDC4030
+ if (HWIF(drive)->chipset == ide_pdc4030) {
+ /* DC4030 hosted drives need their own identify... */
+ extern int pdc4030_identify(ide_drive_t *);
+ if (pdc4030_identify(drive)) {
+ return 1;
+ }
+ } else
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+ OUT_BYTE(cmd,IDE_COMMAND_REG); /* ask drive for ID */
+ timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
+ timeout += jiffies;
+ do {
+ if (0 < (signed long)(jiffies - timeout)) {
+ return 1; /* drive timed-out */
+ }
+ ide_delay_50ms(); /* give drive a breather */
+ } while (IN_BYTE(hd_status) & BUSY_STAT);
+
+ ide_delay_50ms(); /* wait for IRQ and DRQ_STAT */
+ if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
+ unsigned long flags;
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only; some systems need this */
+ do_identify(drive, cmd); /* drive returned ID */
+ rc = 0; /* drive responded with ID */
+ (void) GET_STAT(); /* clear drive IRQ */
+ __restore_flags(flags); /* local CPU only */
+ } else
+ rc = 2; /* drive refused ID */
+ return rc;
+}
+
+static int try_to_identify (ide_drive_t *drive, byte cmd)
+{
+ int retval;
+ int autoprobe = 0;
+ unsigned long cookie = 0;
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::try_to_identify\n");
+ }
+
+ if (IDE_CONTROL_REG && !HWIF(drive)->irq) {
+ autoprobe = 1;
+ cookie = probe_irq_on();
+ OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* enable device irq */
+ }
+
+ retval = actual_try_to_identify(drive, cmd);
+
+ if (autoprobe) {
+ int irq;
+ OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* mask device irq */
+ (void) GET_STAT(); /* clear drive IRQ */
+ udelay(5);
+ irq = probe_irq_off(cookie);
+ if (!HWIF(drive)->irq) {
+ if (irq > 0) {
+ HWIF(drive)->irq = irq;
+ } else { /* Mmmm.. multiple IRQs.. don't know which was ours */
+ printk("%s: IRQ probe failed (0x%lx)\n", drive->name, cookie);
+#ifdef CONFIG_BLK_DEV_CMD640
+#ifdef CMD640_DUMP_REGS
+ if (HWIF(drive)->chipset == ide_cmd640) {
+ printk("%s: Hmmm.. probably a driver problem.\n", drive->name);
+ CMD640_DUMP_REGS;
+ }
+#endif /* CMD640_DUMP_REGS */
+#endif /* CONFIG_BLK_DEV_CMD640 */
+ }
+ }
+ }
+ return retval;
+}
+
+
+/*
+ * do_probe() has the difficult job of finding a drive if it exists,
+ * without getting hung up if it doesn't exist, without trampling on
+ * ethernet cards, and without leaving any IRQs dangling to haunt us later.
+ *
+ * If a drive is "known" to exist (from CMOS or kernel parameters),
+ * but does not respond right away, the probe will "hang in there"
+ * for the maximum wait time (about 30 seconds), otherwise it will
+ * exit much more quickly.
+ *
+ * Returns: 0 device was identified
+ * 1 device timed-out (no response to identify request)
+ * 2 device aborted the command (refused to identify itself)
+ * 3 bad status from device (possible for ATAPI drives)
+ * 4 probe was not attempted because failure was obvious
+ */
+static int do_probe (ide_drive_t *drive, byte cmd)
+{
+ int rc;
+ ide_hwif_t *hwif = HWIF(drive);
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::do_probe\n");
+ }
+
+ if (drive->present) { /* avoid waiting for inappropriate probes */
+ if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY))
+ return 4;
+ }
+#ifdef DEBUG
+ printk("probing for %s: present=%d, media=%d, probetype=%s\n",
+ drive->name, drive->present, drive->media,
+ (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI");
+#endif
+ ide_delay_50ms(); /* needed for some systems (e.g. crw9624 as drive0 with disk as slave) */
+ SELECT_DRIVE(hwif,drive);
+ ide_delay_50ms();
+ if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) {
+ if (drive->select.b.unit != 0) {
+ SELECT_DRIVE(hwif,&hwif->drives[0]); /* exit with drive0 selected */
+ ide_delay_50ms(); /* allow BUSY_STAT to assert & clear */
+ }
+ return 3; /* no i/f present: mmm.. this should be a 4 -ml */
+ }
+
+ if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT)
+ || drive->present || cmd == WIN_PIDENTIFY)
+ {
+ if ((rc = try_to_identify(drive,cmd))) /* send cmd and wait */
+ rc = try_to_identify(drive,cmd); /* failed: try again */
+ if (rc == 1 && cmd == WIN_PIDENTIFY && drive->autotune != 2) {
+ unsigned long timeout;
+ printk("%s: no response (status = 0x%02x), resetting drive\n", drive->name, GET_STAT());
+ ide_delay_50ms();
+ OUT_BYTE (drive->select.all, IDE_SELECT_REG);
+ ide_delay_50ms();
+ OUT_BYTE(WIN_SRST, IDE_COMMAND_REG);
+ timeout = jiffies;
+ while ((GET_STAT() & BUSY_STAT) && time_before(jiffies, timeout + WAIT_WORSTCASE))
+ ide_delay_50ms();
+ rc = try_to_identify(drive, cmd);
+ }
+ if (rc == 1)
+ printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT());
+ (void) GET_STAT(); /* ensure drive irq is clear */
+ } else {
+ rc = 3; /* not present or maybe ATAPI */
+ }
+ if (drive->select.b.unit != 0) {
+ SELECT_DRIVE(hwif,&hwif->drives[0]); /* exit with drive0 selected */
+ ide_delay_50ms();
+ (void) GET_STAT(); /* ensure drive irq is clear */
+ }
+ return rc;
+}
+
+/*
+ *
+ */
+static void enable_nest (ide_drive_t *drive)
+{
+ unsigned long timeout;
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::enable_nest\n");
+ }
+
+ printk("%s: enabling %s -- ", HWIF(drive)->name, drive->id->model);
+ SELECT_DRIVE(HWIF(drive), drive);
+ ide_delay_50ms();
+ OUT_BYTE(EXABYTE_ENABLE_NEST, IDE_COMMAND_REG);
+ timeout = jiffies + WAIT_WORSTCASE;
+ do {
+ if (time_after(jiffies, timeout)) {
+ printk("failed (timeout)\n");
+ return;
+ }
+ ide_delay_50ms();
+ } while (GET_STAT() & BUSY_STAT);
+ ide_delay_50ms();
+ if (!OK_STAT(GET_STAT(), 0, BAD_STAT))
+ printk("failed (status = 0x%02x)\n", GET_STAT());
+ else
+ printk("success\n");
+ if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */
+ (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */
+ }
+}
+
+/*
+ * probe_for_drive() tests for existence of a given drive using do_probe().
+ *
+ * Returns: 0 no device was found
+ * 1 device was found (note: drive->present might still be 0)
+ */
+static inline byte probe_for_drive (ide_drive_t *drive)
+{
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::probe_for_drive\n");
+ }
+
+ if (drive->noprobe) /* skip probing? */
+ return drive->present;
+ if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */
+ (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */
+ }
+ if (drive->id && strstr(drive->id->model, "E X A B Y T E N E S T"))
+ enable_nest(drive);
+ if (!drive->present)
+ return 0; /* drive not found */
+ if (drive->id == NULL) { /* identification failed? */
+ if (drive->media == ide_disk) {
+ printk ("%s: non-IDE drive, CHS=%d/%d/%d\n",
+ drive->name, drive->cyl, drive->head, drive->sect);
+ } else if (drive->media == ide_cdrom) {
+ printk("%s: ATAPI cdrom (?)\n", drive->name);
+ } else {
+ drive->present = 0; /* nuke it */
+ }
+ }
+ return 1; /* drive was found */
+}
+
+/*
+ * Calculate the region that this interface occupies,
+ * handling interfaces where the registers may not be
+ * ordered sanely. We deal with the CONTROL register
+ * separately.
+ */
+static int hwif_check_regions (ide_hwif_t *hwif)
+{
+ int region_errors = 0;
+
+ hwif->straight8 = 0;
+ region_errors = ide_check_region(hwif->io_ports[IDE_DATA_OFFSET], 1);
+ region_errors += ide_check_region(hwif->io_ports[IDE_ERROR_OFFSET], 1);
+ region_errors += ide_check_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1);
+ region_errors += ide_check_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1);
+ region_errors += ide_check_region(hwif->io_ports[IDE_LCYL_OFFSET], 1);
+ region_errors += ide_check_region(hwif->io_ports[IDE_HCYL_OFFSET], 1);
+ region_errors += ide_check_region(hwif->io_ports[IDE_SELECT_OFFSET], 1);
+ region_errors += ide_check_region(hwif->io_ports[IDE_STATUS_OFFSET], 1);
+
+ if (hwif->io_ports[IDE_CONTROL_OFFSET])
+ region_errors += ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+ if (hwif->io_ports[IDE_IRQ_OFFSET])
+ region_errors += ide_check_region(hwif->io_ports[IDE_IRQ_OFFSET], 1);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+ /*
+ * If any errors are return, we drop the hwif interface.
+ */
+ return(region_errors);
+}
+
+static void hwif_register (ide_hwif_t *hwif)
+{
+ if (((unsigned long)hwif->io_ports[IDE_DATA_OFFSET] | 7) ==
+ ((unsigned long)hwif->io_ports[IDE_STATUS_OFFSET])) {
+ ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 8, hwif->name);
+ hwif->straight8 = 1;
+ goto jump_straight8;
+ }
+
+ if (hwif->io_ports[IDE_DATA_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 1, hwif->name);
+ if (hwif->io_ports[IDE_ERROR_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_ERROR_OFFSET], 1, hwif->name);
+ if (hwif->io_ports[IDE_NSECTOR_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1, hwif->name);
+ if (hwif->io_ports[IDE_SECTOR_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1, hwif->name);
+ if (hwif->io_ports[IDE_LCYL_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_LCYL_OFFSET], 1, hwif->name);
+ if (hwif->io_ports[IDE_HCYL_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_HCYL_OFFSET], 1, hwif->name);
+ if (hwif->io_ports[IDE_SELECT_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_SELECT_OFFSET], 1, hwif->name);
+ if (hwif->io_ports[IDE_STATUS_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_STATUS_OFFSET], 1, hwif->name);
+
+jump_straight8:
+ if (hwif->io_ports[IDE_CONTROL_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1, hwif->name);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+ if (hwif->io_ports[IDE_IRQ_OFFSET])
+ ide_request_region(hwif->io_ports[IDE_IRQ_OFFSET], 1, hwif->name);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+}
+
+/*
+ * This routine only knows how to look for drive units 0 and 1
+ * on an interface, so any setting of MAX_DRIVES > 2 won't work here.
+ */
+static void probe_hwif (ide_hwif_t *hwif)
+{
+ unsigned int unit;
+ unsigned long flags;
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::probe_hwif\n");
+ }
+
+ if (hwif->noprobe)
+ return;
+#ifdef CONFIG_BLK_DEV_IDE
+ if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) {
+ extern void probe_cmos_for_drives(ide_hwif_t *);
+
+ probe_cmos_for_drives (hwif);
+ }
+#endif
+
+ if ((hwif->chipset != ide_4drives || !hwif->mate->present) &&
+#if CONFIG_BLK_DEV_PDC4030
+ (hwif->chipset != ide_pdc4030 || hwif->channel == 0) &&
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+ (hwif_check_regions(hwif))) {
+ int msgout = 0;
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ ide_drive_t *drive = &hwif->drives[unit];
+ if (drive->present) {
+ drive->present = 0;
+ printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name);
+ msgout = 1;
+ }
+ }
+ if (!msgout)
+ printk("%s: ports already in use, skipping probe\n", hwif->name);
+ return;
+ }
+
+ __save_flags(flags); /* local CPU only */
+ __sti(); /* local CPU only; needed for jiffies and irq probing */
+ /*
+ * Second drive should only exist if first drive was found,
+ * but a lot of cdrom drives are configured as single slaves.
+ */
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ ide_drive_t *drive = &hwif->drives[unit];
+ (void) probe_for_drive (drive);
+ if (drive->present && !hwif->present) {
+ hwif->present = 1;
+ if (hwif->chipset != ide_4drives || !hwif->mate->present) {
+ hwif_register(hwif);
+ }
+ }
+ }
+ if (hwif->io_ports[IDE_CONTROL_OFFSET] && hwif->reset) {
+ unsigned long timeout = jiffies + WAIT_WORSTCASE;
+ byte stat;
+
+ printk("%s: reset\n", hwif->name);
+ OUT_BYTE(12, hwif->io_ports[IDE_CONTROL_OFFSET]);
+ udelay(10);
+ OUT_BYTE(8, hwif->io_ports[IDE_CONTROL_OFFSET]);
+ do {
+ ide_delay_50ms();
+ stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+ } while ((stat & BUSY_STAT) && 0 < (signed long)(timeout - jiffies));
+
+ }
+ __restore_flags(flags); /* local CPU only */
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ ide_drive_t *drive = &hwif->drives[unit];
+ if (drive->present) {
+ ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
+ if (tuneproc != NULL && drive->autotune == 1)
+ tuneproc(drive, 255); /* auto-tune PIO mode */
+ }
+ }
+}
+
+#if MAX_HWIFS > 1
+/*
+ * save_match() is used to simplify logic in init_irq() below.
+ *
+ * A loophole here is that we may not know about a particular
+ * hwif's irq until after that hwif is actually probed/initialized..
+ * This could be a problem for the case where an hwif is on a
+ * dual interface that requires serialization (eg. cmd640) and another
+ * hwif using one of the same irqs is initialized beforehand.
+ *
+ * This routine detects and reports such situations, but does not fix them.
+ */
+static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
+{
+ ide_hwif_t *m = *match;
+
+ if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
+ if (!new->hwgroup)
+ return;
+ printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name);
+ }
+ if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
+ *match = new;
+}
+#endif /* MAX_HWIFS > 1 */
+
+/*
+ * init request queue
+ */
+static void ide_init_queue(ide_drive_t *drive)
+{
+ request_queue_t *q = &drive->queue;
+
+ q->queuedata = HWGROUP(drive);
+ blk_init_queue(q, do_ide_request);
+
+ if (drive->media == ide_disk) {
+#ifdef CONFIG_BLK_DEV_ELEVATOR_NOOP
+ elevator_init(&q->elevator, ELEVATOR_NOOP);
+#endif
+ }
+}
+
+/*
+ * This routine sets up the irq for an ide interface, and creates a new
+ * hwgroup for the irq/hwif if none was previously assigned.
+ *
+ * Much of the code is for correctly detecting/handling irq sharing
+ * and irq serialization situations. This is somewhat complex because
+ * it handles static as well as dynamic (PCMCIA) IDE interfaces.
+ *
+ * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with
+ * interrupts completely disabled. This can be bad for interrupt latency,
+ * but anything else has led to problems on some machines. We re-enable
+ * interrupts as much as we can safely do in most places.
+ */
+static int init_irq (ide_hwif_t *hwif)
+{
+ unsigned long flags;
+ unsigned int index;
+ ide_hwgroup_t *hwgroup, *new_hwgroup;
+ ide_hwif_t *match = NULL;
+
+
+ /* Allocate the buffer and potentially sleep first */
+
+ new_hwgroup = kmalloc(sizeof(ide_hwgroup_t),GFP_KERNEL);
+
+ save_flags(flags); /* all CPUs */
+ cli(); /* all CPUs */
+
+ hwif->hwgroup = NULL;
+#if MAX_HWIFS > 1
+ /*
+ * Group up with any other hwifs that share our irq(s).
+ */
+ for (index = 0; index < MAX_HWIFS; index++) {
+ ide_hwif_t *h = &ide_hwifs[index];
+ if (h->hwgroup) { /* scan only initialized hwif's */
+ if (hwif->irq == h->irq) {
+ hwif->sharing_irq = h->sharing_irq = 1;
+ if (hwif->chipset != ide_pci || h->chipset != ide_pci) {
+ save_match(hwif, h, &match);
+ }
+ }
+ if (hwif->serialized) {
+ if (hwif->mate && hwif->mate->irq == h->irq)
+ save_match(hwif, h, &match);
+ }
+ if (h->serialized) {
+ if (h->mate && hwif->irq == h->mate->irq)
+ save_match(hwif, h, &match);
+ }
+ }
+ }
+#endif /* MAX_HWIFS > 1 */
+ /*
+ * If we are still without a hwgroup, then form a new one
+ */
+ if (match) {
+ hwgroup = match->hwgroup;
+ if(new_hwgroup)
+ kfree(new_hwgroup);
+ } else {
+ hwgroup = new_hwgroup;
+ if (!hwgroup) {
+ restore_flags(flags); /* all CPUs */
+ return 1;
+ }
+ memset(hwgroup, 0, sizeof(ide_hwgroup_t));
+ hwgroup->hwif = hwif->next = hwif;
+ hwgroup->rq = NULL;
+ hwgroup->handler = NULL;
+ hwgroup->drive = NULL;
+ hwgroup->busy = 0;
+ init_timer(&hwgroup->timer);
+ hwgroup->timer.function = &ide_timer_expiry;
+ hwgroup->timer.data = (unsigned long) hwgroup;
+ }
+
+ /*
+ * Allocate the irq, if not already obtained for another hwif
+ */
+ if (!match || match->irq != hwif->irq) {
+#ifdef CONFIG_IDEPCI_SHARE_IRQ
+ int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_SHIRQ : SA_INTERRUPT;
+#else /* !CONFIG_IDEPCI_SHARE_IRQ */
+ int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_INTERRUPT|SA_SHIRQ : SA_INTERRUPT;
+#endif /* CONFIG_IDEPCI_SHARE_IRQ */
+
+ if (hwif->io_ports[IDE_CONTROL_OFFSET])
+ OUT_BYTE(0x08, hwif->io_ports[IDE_CONTROL_OFFSET]); /* clear nIEN */
+
+ if (ide_request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwgroup)) {
+ if (!match)
+ kfree(hwgroup);
+ restore_flags(flags); /* all CPUs */
+ return 1;
+ }
+ }
+
+ /*
+ * Everything is okay, so link us into the hwgroup
+ */
+ hwif->hwgroup = hwgroup;
+ hwif->next = hwgroup->hwif->next;
+ hwgroup->hwif->next = hwif;
+
+ for (index = 0; index < MAX_DRIVES; ++index) {
+ ide_drive_t *drive = &hwif->drives[index];
+ if (!drive->present)
+ continue;
+ if (!hwgroup->drive)
+ hwgroup->drive = drive;
+ drive->next = hwgroup->drive->next;
+ hwgroup->drive->next = drive;
+ ide_init_queue(drive);
+ }
+ if (!hwgroup->hwif) {
+ hwgroup->hwif = HWIF(hwgroup->drive);
+#ifdef DEBUG
+ printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name);
+#endif
+ }
+ restore_flags(flags); /* all CPUs; safe now that hwif->hwgroup is set up */
+
+#if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__)
+ printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name,
+ hwif->io_ports[IDE_DATA_OFFSET],
+ hwif->io_ports[IDE_DATA_OFFSET]+7,
+ hwif->io_ports[IDE_CONTROL_OFFSET], hwif->irq);
+#elif defined(__sparc__)
+ printk("%s at 0x%03lx-0x%03lx,0x%03lx on irq %s", hwif->name,
+ hwif->io_ports[IDE_DATA_OFFSET],
+ hwif->io_ports[IDE_DATA_OFFSET]+7,
+ hwif->io_ports[IDE_CONTROL_OFFSET], __irq_itoa(hwif->irq));
+#else
+ printk("%s at %p on irq 0x%08x", hwif->name,
+ hwif->io_ports[IDE_DATA_OFFSET], hwif->irq);
+#endif /* __mc68000__ && CONFIG_APUS */
+ if (match)
+ printk(" (%sed with %s)",
+ hwif->sharing_irq ? "shar" : "serializ", match->name);
+ printk("\n");
+ return 0;
+}
+
+/*
+ * init_gendisk() (as opposed to ide_geninit) is called for each major device,
+ * after probing for drives, to allocate partition tables and other data
+ * structures needed for the routines in genhd.c. ide_geninit() gets called
+ * somewhat later, during the partition check.
+ */
+static void init_gendisk (ide_hwif_t *hwif)
+{
+ struct gendisk *gd;
+ unsigned int unit, units, minors;
+ int *bs, *max_sect; /* , *max_ra; */
+#ifdef DEVFS_MUST_DIE
+ extern devfs_handle_t ide_devfs_handle;
+#endif
+
+#if 1
+ units = MAX_DRIVES;
+#else
+ /* figure out maximum drive number on the interface */
+ for (units = MAX_DRIVES; units > 0; --units) {
+ if (hwif->drives[units-1].present)
+ break;
+ }
+#endif
+
+ minors = units * (1<<PARTN_BITS);
+ gd = kmalloc (sizeof(struct gendisk), GFP_KERNEL);
+ if (!gd)
+ goto err_kmalloc_gd;
+ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
+ if (!gd->sizes)
+ goto err_kmalloc_gd_sizes;
+ gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
+ if (!gd->part)
+ goto err_kmalloc_gd_part;
+ bs = kmalloc (minors*sizeof(int), GFP_KERNEL);
+ if (!bs)
+ goto err_kmalloc_bs;
+ max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL);
+ if (!max_sect)
+ goto err_kmalloc_max_sect;
+#if 0
+ max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL);
+ if (!max_ra)
+ goto err_kmalloc_max_ra;
+#endif
+
+ memset(gd->part, 0, minors * sizeof(struct hd_struct));
+
+ /* cdroms and msdos f/s are examples of non-1024 blocksizes */
+ blksize_size[hwif->major] = bs;
+ max_sectors[hwif->major] = max_sect;
+ /*max_readahead[hwif->major] = max_ra;*/
+ for (unit = 0; unit < minors; ++unit) {
+ *bs++ = BLOCK_SIZE;
+ /*
+ * IDE can do up to 128K per request == 256
+ */
+ *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 128);
+ /* *max_ra++ = vm_max_readahead; */
+ }
+
+ for (unit = 0; unit < units; ++unit)
+ hwif->drives[unit].part = &gd->part[unit << PARTN_BITS];
+
+ gd->major = hwif->major; /* our major device number */
+ gd->major_name = IDE_MAJOR_NAME; /* treated special in genhd.c */
+ gd->minor_shift = PARTN_BITS; /* num bits for partitions */
+ gd->max_p = 1<<PARTN_BITS; /* 1 + max partitions / drive */
+ gd->nr_real = units; /* current num real drives */
+ gd->real_devices= hwif; /* ptr to internal data */
+ gd->next = NULL; /* linked list of major devs */
+ gd->fops = ide_fops; /* file operations */
+ gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL);
+ if (gd->flags)
+ memset (gd->flags, 0, sizeof *gd->flags * units);
+#ifdef DEVFS_MUST_DIE
+ gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL);
+ if (gd->de_arr)
+ memset (gd->de_arr, 0, sizeof *gd->de_arr * units);
+#endif
+
+ hwif->gd = gd;
+ add_gendisk(gd);
+
+ for (unit = 0; unit < units; ++unit) {
+#if 1
+ char name[64];
+ ide_add_generic_settings(hwif->drives + unit);
+ hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit);
+ sprintf (name, "host%d/bus%d/target%d/lun%d",
+ (hwif->channel && hwif->mate) ?
+ hwif->mate->index : hwif->index,
+ hwif->channel, unit, hwif->drives[unit].lun);
+#ifdef DEVFS_MUST_DIE
+ if (hwif->drives[unit].present)
+ hwif->drives[unit].de = devfs_mk_dir(ide_devfs_handle, name, NULL);
+#endif
+#else
+ if (hwif->drives[unit].present) {
+ char name[64];
+
+ ide_add_generic_settings(hwif->drives + unit);
+ hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit);
+ sprintf (name, "host%d/bus%d/target%d/lun%d",
+ (hwif->channel && hwif->mate) ? hwif->mate->index : hwif->index,
+ hwif->channel, unit, hwif->drives[unit].lun);
+ hwif->drives[unit].de =
+ devfs_mk_dir (ide_devfs_handle, name, NULL);
+ }
+#endif
+ }
+ return;
+
+#if 0
+err_kmalloc_max_ra:
+ kfree(max_sect);
+#endif
+err_kmalloc_max_sect:
+ kfree(bs);
+err_kmalloc_bs:
+ kfree(gd->part);
+err_kmalloc_gd_part:
+ kfree(gd->sizes);
+err_kmalloc_gd_sizes:
+ kfree(gd);
+err_kmalloc_gd:
+ printk(KERN_WARNING "(ide::init_gendisk) Out of memory\n");
+ return;
+}
+
+static int hwif_init (ide_hwif_t *hwif)
+{
+ if (!hwif->present)
+ return 0;
+ if (!hwif->irq) {
+ if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET])))
+ {
+ printk("%s: DISABLED, NO IRQ\n", hwif->name);
+ return (hwif->present = 0);
+ }
+ }
+#ifdef CONFIG_BLK_DEV_HD
+ if (hwif->irq == HD_IRQ && hwif->io_ports[IDE_DATA_OFFSET] != HD_DATA) {
+ printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name);
+ return (hwif->present = 0);
+ }
+#endif /* CONFIG_BLK_DEV_HD */
+
+ hwif->present = 0; /* we set it back to 1 if all is ok below */
+
+#ifdef DEVFS_MUST_DIE
+ if (devfs_register_blkdev (hwif->major, hwif->name, ide_fops)) {
+ printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major);
+ return (hwif->present = 0);
+ }
+#endif
+
+ if (init_irq(hwif)) {
+ int i = hwif->irq;
+ /*
+ * It failed to initialise. Find the default IRQ for
+ * this port and try that.
+ */
+ if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET]))) {
+ printk("%s: Disabled unable to get IRQ %d.\n", hwif->name, i);
+ (void) unregister_blkdev (hwif->major, hwif->name);
+ return (hwif->present = 0);
+ }
+ if (init_irq(hwif)) {
+ printk("%s: probed IRQ %d and default IRQ %d failed.\n",
+ hwif->name, i, hwif->irq);
+ (void) unregister_blkdev (hwif->major, hwif->name);
+ return (hwif->present = 0);
+ }
+ printk("%s: probed IRQ %d failed, using default.\n",
+ hwif->name, hwif->irq);
+ }
+
+ init_gendisk(hwif);
+ blk_dev[hwif->major].data = hwif;
+ blk_dev[hwif->major].queue = ide_get_queue;
+#if 0
+ read_ahead[hwif->major] = 8; /* (4kB) */
+#endif
+ hwif->present = 1; /* success */
+
+#if (DEBUG_SPINLOCK > 0)
+{
+ static int done = 0;
+ if (!done++)
+ printk("io_request_lock is %p\n", &io_request_lock); /* FIXME */
+}
+#endif
+ return hwif->present;
+}
+
+void export_ide_init_queue (ide_drive_t *drive)
+{
+ ide_init_queue(drive);
+}
+
+byte export_probe_for_drive (ide_drive_t *drive)
+{
+ return probe_for_drive(drive);
+}
+
+EXPORT_SYMBOL(export_ide_init_queue);
+EXPORT_SYMBOL(export_probe_for_drive);
+
+int ideprobe_init (void);
+static ide_module_t ideprobe_module = {
+ IDE_PROBE_MODULE,
+ ideprobe_init,
+ NULL
+};
+
+int ideprobe_init (void)
+{
+ unsigned int index;
+ int probe[MAX_HWIFS];
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::ideprobe_init\n");
+ }
+
+ MOD_INC_USE_COUNT;
+ memset(probe, 0, MAX_HWIFS * sizeof(int));
+ for (index = 0; index < MAX_HWIFS; ++index)
+ probe[index] = !ide_hwifs[index].present;
+
+ /*
+ * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports
+ */
+ for (index = 0; index < MAX_HWIFS; ++index)
+ if (probe[index])
+ probe_hwif(&ide_hwifs[index]);
+ for (index = 0; index < MAX_HWIFS; ++index)
+ if (probe[index])
+ hwif_init(&ide_hwifs[index]);
+ if (!ide_probe)
+ ide_probe = &ideprobe_module;
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+#ifdef MODULE
+extern int (*ide_xlate_1024_hook)(kdev_t, int, int, const char *);
+
+int init_module (void)
+{
+ unsigned int index;
+
+ for (index = 0; index < MAX_HWIFS; ++index)
+ ide_unregister(index);
+ ideprobe_init();
+ create_proc_ide_interfaces();
+ ide_xlate_1024_hook = ide_xlate_1024;
+ return 0;
+}
+
+void cleanup_module (void)
+{
+ ide_probe = NULL;
+ ide_xlate_1024_hook = 0;
+}
+MODULE_LICENSE("GPL");
+#endif /* MODULE */
diff --git a/xen/drivers/ide/ide-taskfile.c b/xen/drivers/ide/ide-taskfile.c
new file mode 100644
index 0000000000..6e1286165f
--- /dev/null
+++ b/xen/drivers/ide/ide-taskfile.c
@@ -0,0 +1,1733 @@
+/*
+ * linux/drivers/ide/ide-taskfile.c Version 0.20 Oct 11, 2000
+ *
+ * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
+ * Copyright (C) 2000 Andre Hedrick <andre@linux-ide.org>
+ *
+ * May be copied or modified under the terms of the GNU General Public License
+ *
+ * IDE_DEBUG(__LINE__);
+ */
+
+#include <xeno/config.h>
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/hdreg.h>
+#include <xeno/ide.h>
+
+#include <asm/domain_page.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+# define __TASKFILE__IO
+#else /* CONFIG_IDE_TASKFILE_IO */
+# undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#define DEBUG_TASKFILE 0 /* unset when fixed */
+
+#if DEBUG_TASKFILE
+#define DTF(x...) printk(##x)
+#else
+#define DTF(x...)
+#endif
+
+inline u32 task_read_24 (ide_drive_t *drive)
+{
+ return (IN_BYTE(IDE_HCYL_REG)<<16) |
+ (IN_BYTE(IDE_LCYL_REG)<<8) |
+ IN_BYTE(IDE_SECTOR_REG);
+}
+
+static void ata_bswap_data (void *buffer, int wcount)
+{
+ u16 *p = buffer;
+
+ while (wcount--) {
+ *p = *p << 8 | *p >> 8; p++;
+ *p = *p << 8 | *p >> 8; p++;
+ }
+}
+
+#if SUPPORT_VLB_SYNC
+/*
+ * Some localbus EIDE interfaces require a special access sequence
+ * when using 32-bit I/O instructions to transfer data. We call this
+ * the "vlb_sync" sequence, which consists of three successive reads
+ * of the sector count register location, with interrupts disabled
+ * to ensure that the reads all happen together.
+ */
+static inline void task_vlb_sync (ide_ioreg_t port) {
+ (void) inb (port);
+ (void) inb (port);
+ (void) inb (port);
+}
+#endif /* SUPPORT_VLB_SYNC */
+
+/*
+ * This is used for most PIO data transfers *from* the IDE interface
+ */
+void ata_input_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+ byte io_32bit = drive->io_32bit;
+
+ void *buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+ if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+ if (io_32bit & 2) {
+ unsigned long flags;
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only */
+ task_vlb_sync(IDE_NSECTOR_REG);
+ insl(IDE_DATA_REG, buffer, wcount);
+ __restore_flags(flags); /* local CPU only */
+ } else
+#endif /* SUPPORT_VLB_SYNC */
+ insl(IDE_DATA_REG, buffer, wcount);
+ } else {
+#if SUPPORT_SLOW_DATA_PORTS
+ if (drive->slow) {
+ unsigned short *ptr = (unsigned short *) buffer;
+ while (wcount--) {
+ *ptr++ = inw_p(IDE_DATA_REG);
+ *ptr++ = inw_p(IDE_DATA_REG);
+ }
+ } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+ insw(IDE_DATA_REG, buffer, wcount<<1);
+ }
+
+ unmap_domain_mem(buffer);
+}
+
+/*
+ * This is used for most PIO data transfers *to* the IDE interface
+ */
+void ata_output_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+ byte io_32bit = drive->io_32bit;
+
+ void *buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+ if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+ if (io_32bit & 2) {
+ unsigned long flags;
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only */
+ task_vlb_sync(IDE_NSECTOR_REG);
+ outsl(IDE_DATA_REG, buffer, wcount);
+ __restore_flags(flags); /* local CPU only */
+ } else
+#endif /* SUPPORT_VLB_SYNC */
+ outsl(IDE_DATA_REG, buffer, wcount);
+ } else {
+#if SUPPORT_SLOW_DATA_PORTS
+ if (drive->slow) {
+ unsigned short *ptr = (unsigned short *) buffer;
+ while (wcount--) {
+ outw_p(*ptr++, IDE_DATA_REG);
+ outw_p(*ptr++, IDE_DATA_REG);
+ }
+ } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+ outsw(IDE_DATA_REG, buffer, wcount<<1);
+ }
+
+ unmap_domain_mem(buffer);
+}
+
+
+static inline void taskfile_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+ ata_input_data(drive, buffer, wcount);
+ if (drive->bswap)
+ ata_bswap_data(buffer, wcount);
+}
+
+static inline void taskfile_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+ if (drive->bswap) {
+ ata_bswap_data(buffer, wcount);
+ ata_output_data(drive, buffer, wcount);
+ ata_bswap_data(buffer, wcount);
+ } else {
+ ata_output_data(drive, buffer, wcount);
+ }
+}
+
+ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
+{
+ task_struct_t *taskfile = (task_struct_t *) task->tfRegister;
+ hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister;
+ struct hd_driveid *id = drive->id;
+ byte HIHI = (drive->addressing) ? 0xE0 : 0xEF;
+
+ printk(KERN_ALERT "do_rw_taskfile\n");
+
+ /* (ks/hs): Moved to start, do not use for multiple out commands */
+ if (task->handler != task_mulout_intr) {
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */
+ SELECT_MASK(HWIF(drive), drive, 0);
+ }
+
+ if ((id->command_set_2 & 0x0400) &&
+ (id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ OUT_BYTE(hobfile->feature, IDE_FEATURE_REG);
+ OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+ OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+ OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+ OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+ }
+
+ OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+ OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+ /* refers to number of sectors to transfer */
+ OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+ /* refers to sector offset or start sector */
+ OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+ OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+ OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG);
+ if (task->handler != NULL) {
+#if 0
+ ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+ OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+ /*
+ * warning check for race between handler and prehandler for
+ * writing first block of data. however since we are well
+ * inside the boundaries of the seek, we should be okay.
+ */
+ if (task->prehandler != NULL) {
+ return task->prehandler(drive, task->rq);
+ }
+#else
+ ide_startstop_t startstop;
+
+ ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+ OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+
+ if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+ printk(KERN_ERR "%s: no DRQ after issuing %s\n",
+ drive->name,
+ drive->mult_count ? "MULTWRITE" : "WRITE");
+ return startstop;
+ }
+ /* (ks/hs): Fixed Multi Write */
+ if ((taskfile->command != WIN_MULTWRITE) &&
+ (taskfile->command != WIN_MULTWRITE_EXT)) {
+ struct request *rq = HWGROUP(drive)->rq;
+ /* For Write_sectors we need to stuff the first sector */
+ taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+ rq->current_nr_sectors--;
+ } else {
+ /* Stuff first sector(s) by implicitly calling the handler */
+ if (!(drive_is_ready(drive))) {
+ /* FIXME: Replace hard-coded 100, error handling? */
+ int i;
+ for (i=0; i<100; i++) {
+ if (drive_is_ready(drive))
+ break;
+ }
+ }
+ return task->handler(drive);
+ }
+#endif
+ } else {
+ /* for dma commands we down set the handler */
+ if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+ }
+
+ return ide_started;
+}
+
+void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler)
+{
+ struct hd_driveid *id = drive->id;
+ byte HIHI = (drive->addressing) ? 0xE0 : 0xEF;
+
+ /* (ks/hs): Moved to start, do not use for multiple out commands */
+ if (*handler != task_mulout_intr) {
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */
+ SELECT_MASK(HWIF(drive), drive, 0);
+ }
+
+ if ((id->command_set_2 & 0x0400) &&
+ (id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ OUT_BYTE(hobfile->feature, IDE_FEATURE_REG);
+ OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+ OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+ OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+ OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+ }
+
+ OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+ OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+ /* refers to number of sectors to transfer */
+ OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+ /* refers to sector offset or start sector */
+ OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+ OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+ OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG);
+ if (handler != NULL) {
+ ide_set_handler (drive, handler, WAIT_CMD, NULL);
+ OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+ } else {
+ /* for dma commands we down set the handler */
+ if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+ }
+}
+
+#if 0
+ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task)
+{
+ task_struct_t *taskfile = (task_struct_t *) task->tfRegister;
+ hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister;
+ struct hd_driveid *id = drive->id;
+
+ /*
+ * (KS) Check taskfile in/out flags.
+ * If set, then execute as it is defined.
+ * If not set, then define default settings.
+ * The default values are:
+ * write and read all taskfile registers (except data)
+ * write and read the hob registers (sector,nsector,lcyl,hcyl)
+ */
+ if (task->tf_out_flags.all == 0) {
+ task->tf_out_flags.all = IDE_TASKFILE_STD_OUT_FLAGS;
+ if ((id->command_set_2 & 0x0400) &&
+ (id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ task->tf_out_flags.all != (IDE_HOB_STD_OUT_FLAGS << 8);
+ }
+ }
+
+ if (task->tf_in_flags.all == 0) {
+ task->tf_in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
+ if ((id->command_set_2 & 0x0400) &&
+ (id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ task->tf_in_flags.all != (IDE_HOB_STD_IN_FLAGS << 8);
+ }
+ }
+
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */
+ SELECT_MASK(HWIF(drive), drive, 0);
+
+ if (task->tf_out_flags.b.data) {
+ unsigned short data = taskfile->data + (hobfile->data << 8);
+ OUT_WORD (data, IDE_DATA_REG);
+ }
+
+ /* (KS) send hob registers first */
+ if (task->tf_out_flags.b.nsector_hob)
+ OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+ if (task->tf_out_flags.b.sector_hob)
+ OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+ if (task->tf_out_flags.b.lcyl_hob)
+ OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+ if (task->tf_out_flags.b.hcyl_hob)
+ OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+
+
+ /* (KS) Send now the standard registers */
+ if (task->tf_out_flags.b.error_feature)
+ OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+ /* refers to number of sectors to transfer */
+ if (task->tf_out_flags.b.nsector)
+ OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+ /* refers to sector offset or start sector */
+ if (task->tf_out_flags.b.sector)
+ OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+ if (task->tf_out_flags.b.lcyl)
+ OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+ if (task->tf_out_flags.b.hcyl)
+ OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+ /*
+ * (KS) Do not modify the specified taskfile. We want to have a
+ * universal pass through, so we must execute ALL specified values.
+ *
+ * (KS) The drive head register is mandatory.
+ * Don't care about the out flags !
+ */
+ OUT_BYTE(taskfile->device_head | drive->select.all, IDE_SELECT_REG);
+ if (task->handler != NULL) {
+#if 0
+ ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+ OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+ /*
+ * warning check for race between handler and prehandler for
+ * writing first block of data. however since we are well
+ * inside the boundaries of the seek, we should be okay.
+ */
+ if (task->prehandler != NULL) {
+ return task->prehandler(drive, task->rq);
+ }
+#else
+ ide_startstop_t startstop;
+
+ ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+
+ /*
+ * (KS) The drive command register is also mandatory.
+ * Don't care about the out flags !
+ */
+ OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+
+ if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+ printk(KERN_ERR "%s: no DRQ after issuing %s\n",
+ drive->name,
+ drive->mult_count ? "MULTWRITE" : "WRITE");
+ return startstop;
+ }
+ /* (ks/hs): Fixed Multi Write */
+ if ((taskfile->command != WIN_MULTWRITE) &&
+ (taskfile->command != WIN_MULTWRITE_EXT)) {
+ struct request *rq = HWGROUP(drive)->rq;
+ /* For Write_sectors we need to stuff the first sector */
+ taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+ rq->current_nr_sectors--;
+ } else {
+ /* Stuff first sector(s) by implicitly calling the handler */
+ if (!(drive_is_ready(drive))) {
+ /* FIXME: Replace hard-coded 100, error handling? */
+ int i;
+ for (i=0; i<100; i++) {
+ if (drive_is_ready(drive))
+ break;
+ }
+ }
+ return task->handler(drive);
+ }
+#endif
+ } else {
+ /* for dma commands we down set the handler */
+ if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+ }
+
+ return ide_started;
+}
+#endif
+
+#if 0
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte taskfile_dump_status (ide_drive_t *drive, const char *msg, byte stat)
+{
+ unsigned long flags;
+ byte err = 0;
+
+ __save_flags (flags); /* local CPU only */
+ ide__sti(); /* local CPU only */
+ printk("%s: %s: status=0x%02x", drive->name, msg, stat);
+#if FANCY_STATUS_DUMPS
+ printk(" { ");
+ if (stat & BUSY_STAT)
+ printk("Busy ");
+ else {
+ if (stat & READY_STAT) printk("DriveReady ");
+ if (stat & WRERR_STAT) printk("DeviceFault ");
+ if (stat & SEEK_STAT) printk("SeekComplete ");
+ if (stat & DRQ_STAT) printk("DataRequest ");
+ if (stat & ECC_STAT) printk("CorrectedError ");
+ if (stat & INDEX_STAT) printk("Index ");
+ if (stat & ERR_STAT) printk("Error ");
+ }
+ printk("}");
+#endif /* FANCY_STATUS_DUMPS */
+ printk("\n");
+ if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) {
+ err = GET_ERR();
+ printk("%s: %s: error=0x%02x", drive->name, msg, err);
+#if FANCY_STATUS_DUMPS
+ if (drive->media == ide_disk) {
+ printk(" { ");
+ if (err & ABRT_ERR) printk("DriveStatusError ");
+ if (err & ICRC_ERR) printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector ");
+ if (err & ECC_ERR) printk("UncorrectableError ");
+ if (err & ID_ERR) printk("SectorIdNotFound ");
+ if (err & TRK0_ERR) printk("TrackZeroNotFound ");
+ if (err & MARK_ERR) printk("AddrMarkNotFound ");
+ printk("}");
+ if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) {
+ if ((drive->id->command_set_2 & 0x0400) &&
+ (drive->id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ __u64 sectors = 0;
+ u32 low = 0, high = 0;
+ low = task_read_24(drive);
+ OUT_BYTE(0x80, IDE_CONTROL_REG);
+ high = task_read_24(drive);
+ sectors = ((__u64)high << 24) | low;
+ printk(", LBAsect=%lld", sectors);
+ } else {
+ byte cur = IN_BYTE(IDE_SELECT_REG);
+ if (cur & 0x40) { /* using LBA? */
+ printk(", LBAsect=%ld", (unsigned long)
+ ((cur&0xf)<<24)
+ |(IN_BYTE(IDE_HCYL_REG)<<16)
+ |(IN_BYTE(IDE_LCYL_REG)<<8)
+ | IN_BYTE(IDE_SECTOR_REG));
+ } else {
+ printk(", CHS=%d/%d/%d",
+ (IN_BYTE(IDE_HCYL_REG)<<8) +
+ IN_BYTE(IDE_LCYL_REG),
+ cur & 0xf,
+ IN_BYTE(IDE_SECTOR_REG));
+ }
+ }
+ if (HWGROUP(drive)->rq)
+ printk(", sector=%llu", (__u64) HWGROUP(drive)->rq->sector);
+ }
+ }
+#endif /* FANCY_STATUS_DUMPS */
+ printk("\n");
+ }
+ __restore_flags (flags); /* local CPU only */
+ return err;
+}
+
+/*
+ * Clean up after success/failure of an explicit taskfile operation.
+ */
+void ide_end_taskfile (ide_drive_t *drive, byte stat, byte err)
+{
+ unsigned long flags;
+ struct request *rq;
+ ide_task_t *args;
+ task_ioreg_t command;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ rq = HWGROUP(drive)->rq;
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ args = (ide_task_t *) rq->special;
+
+ command = args->tfRegister[IDE_COMMAND_OFFSET];
+
+ rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+
+ args->tfRegister[IDE_ERROR_OFFSET] = err;
+ args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG);
+ args->tfRegister[IDE_SECTOR_OFFSET] = IN_BYTE(IDE_SECTOR_REG);
+ args->tfRegister[IDE_LCYL_OFFSET] = IN_BYTE(IDE_LCYL_REG);
+ args->tfRegister[IDE_HCYL_OFFSET] = IN_BYTE(IDE_HCYL_REG);
+ args->tfRegister[IDE_SELECT_OFFSET] = IN_BYTE(IDE_SELECT_REG);
+ args->tfRegister[IDE_STATUS_OFFSET] = stat;
+ if ((drive->id->command_set_2 & 0x0400) &&
+ (drive->id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB);
+ args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG);
+ args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG);
+ args->hobRegister[IDE_SECTOR_OFFSET_HOB] = IN_BYTE(IDE_SECTOR_REG);
+ args->hobRegister[IDE_LCYL_OFFSET_HOB] = IN_BYTE(IDE_LCYL_REG);
+ args->hobRegister[IDE_HCYL_OFFSET_HOB] = IN_BYTE(IDE_HCYL_REG);
+ }
+
+/* taskfile_settings_update(drive, args, command); */
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ blkdev_dequeue_request(rq);
+ HWGROUP(drive)->rq = NULL;
+ end_that_request_last(rq);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * try_to_flush_leftover_data() is invoked in response to a drive
+ * unexpectedly having its DRQ_STAT bit set. As an alternative to
+ * resetting the drive, this routine tries to clear the condition
+ * by read a sector's worth of data from the drive. Of course,
+ * this may not help if the drive is *waiting* for data from *us*.
+ */
+void task_try_to_flush_leftover_data (ide_drive_t *drive)
+{
+ int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
+
+ if (drive->media != ide_disk)
+ return;
+ while (i > 0) {
+ u32 buffer[16];
+ unsigned int wcount = (i > 16) ? 16 : i;
+ i -= wcount;
+ taskfile_input_data (drive, buffer, wcount);
+ }
+}
+
+/*
+ * taskfile_error() takes action based on the error returned by the drive.
+ */
+ide_startstop_t taskfile_error (ide_drive_t *drive, const char *msg, byte stat)
+{
+ struct request *rq;
+ byte err;
+
+ err = taskfile_dump_status(drive, msg, stat);
+ if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+ return ide_stopped;
+ /* retry only "normal" I/O: */
+ if (rq->cmd == IDE_DRIVE_TASKFILE) {
+ rq->errors = 1;
+ ide_end_taskfile(drive, stat, err);
+ return ide_stopped;
+ }
+ if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */
+ rq->errors |= ERROR_RESET;
+ } else {
+ if (drive->media == ide_disk && (stat & ERR_STAT)) {
+ /* err has different meaning on cdrom and tape */
+ if (err == ABRT_ERR) {
+ if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY)
+ return ide_stopped; /* some newer drives don't support WIN_SPECIFY */
+ } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) {
+ drive->crc_count++; /* UDMA crc error -- just retry the operation */
+ } else if (err & (BBD_ERR | ECC_ERR)) /* retries won't help these */
+ rq->errors = ERROR_MAX;
+ else if (err & TRK0_ERR) /* help it find track zero */
+ rq->errors |= ERROR_RECAL;
+ }
+ if ((stat & DRQ_STAT) && rq->cmd != WRITE)
+ task_try_to_flush_leftover_data(drive);
+ }
+ if (GET_STAT() & (BUSY_STAT|DRQ_STAT))
+ OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG); /* force an abort */
+
+ if (rq->errors >= ERROR_MAX) {
+ if (drive->driver != NULL)
+ DRIVER(drive)->end_request(0, HWGROUP(drive));
+ else
+ ide_end_request(0, HWGROUP(drive));
+ } else {
+ if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
+ ++rq->errors;
+ return ide_do_reset(drive);
+ }
+ if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+ drive->special.b.recalibrate = 1;
+ ++rq->errors;
+ }
+ return ide_stopped;
+}
+#endif
+
+/*
+ * Handler for special commands without a data phase from ide-disk
+ */
+
+/*
+ * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd.
+ */
+ide_startstop_t set_multmode_intr (ide_drive_t *drive)
+{
+ byte stat;
+
+ if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) {
+ drive->mult_count = drive->mult_req;
+ } else {
+ drive->mult_req = drive->mult_count = 0;
+ drive->special.b.recalibrate = 1;
+ (void) ide_dump_status(drive, "set_multmode", stat);
+ }
+ return ide_stopped;
+}
+
+/*
+ * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd.
+ */
+ide_startstop_t set_geometry_intr (ide_drive_t *drive)
+{
+ byte stat;
+
+ if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT))
+ return ide_stopped;
+
+ if (stat & (ERR_STAT|DRQ_STAT))
+ return ide_error(drive, "set_geometry_intr", stat);
+
+ ide_set_handler(drive, &set_geometry_intr, WAIT_CMD, NULL);
+ return ide_started;
+}
+
+/*
+ * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd.
+ */
+ide_startstop_t recal_intr (ide_drive_t *drive)
+{
+ byte stat = GET_STAT();
+
+ if (!OK_STAT(stat,READY_STAT,BAD_STAT))
+ return ide_error(drive, "recal_intr", stat);
+ return ide_stopped;
+}
+
+/*
+ * Handler for commands without a data phase
+ */
+ide_startstop_t task_no_data_intr (ide_drive_t *drive)
+{
+ ide_task_t *args = HWGROUP(drive)->rq->special;
+ byte stat = GET_STAT();
+
+ ide__sti(); /* local CPU only */
+
+ if (!OK_STAT(stat, READY_STAT, BAD_STAT))
+ return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */
+
+ if (args)
+ ide_end_drive_cmd (drive, stat, GET_ERR());
+
+ return ide_stopped;
+}
+
+/*
+ * Handler for command with PIO data-in phase
+ */
+ide_startstop_t task_in_intr (ide_drive_t *drive)
+{
+ byte stat = GET_STAT();
+ byte io_32bit = drive->io_32bit;
+ struct request *rq = HWGROUP(drive)->rq;
+ char *pBuf = NULL;
+
+ if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+ if (stat & (ERR_STAT|DRQ_STAT)) {
+ return ide_error(drive, "task_in_intr", stat);
+ }
+ if (!(stat & BUSY_STAT)) {
+ DTF("task_in_intr to Soon wait for next interrupt\n");
+ ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ }
+ DTF("stat: %02x\n", stat);
+ pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+ DTF("Read: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors);
+
+ drive->io_32bit = 0;
+ taskfile_input_data(drive, pBuf, SECTOR_WORDS);
+ drive->io_32bit = io_32bit;
+
+ if (--rq->current_nr_sectors <= 0) {
+ /* (hs): swapped next 2 lines */
+ DTF("Request Ended stat: %02x\n", GET_STAT());
+ ide_end_request(1, HWGROUP(drive));
+ } else {
+ ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ return ide_stopped;
+}
+
+#undef ALTSTAT_SCREW_UP
+
+#ifdef ALTSTAT_SCREW_UP
+/*
+ * (ks/hs): Poll Alternate Status Register to ensure
+ * that drive is not busy.
+ */
+byte altstat_multi_busy (ide_drive_t *drive, byte stat, const char *msg)
+{
+ int i;
+
+ DTF("multi%s: ASR = %x\n", msg, stat);
+ if (stat & BUSY_STAT) {
+ /* (ks/hs): FIXME: Replace hard-coded 100, error handling? */
+ for (i=0; i<100; i++) {
+ stat = GET_ALTSTAT();
+ if ((stat & BUSY_STAT) == 0)
+ break;
+ }
+ }
+ /*
+ * (ks/hs): Read Status AFTER Alternate Status Register
+ */
+ return(GET_STAT());
+}
+
+/*
+ * (ks/hs): Poll Alternate status register to wait for drive
+ * to become ready for next transfer
+ */
+byte altstat_multi_poll (ide_drive_t *drive, byte stat, const char *msg)
+{
+ /* (ks/hs): FIXME: Error handling, time-out? */
+ while (stat & BUSY_STAT)
+ stat = GET_ALTSTAT();
+ DTF("multi%s: nsect=1, ASR = %x\n", msg, stat);
+ return(GET_STAT()); /* (ks/hs): Clear pending IRQ */
+}
+#endif /* ALTSTAT_SCREW_UP */
+
+/*
+ * Handler for command with Read Multiple
+ */
+ide_startstop_t task_mulin_intr (ide_drive_t *drive)
+{
+ unsigned int msect, nsect;
+
+#ifdef ALTSTAT_SCREW_UP
+ byte stat = altstat_multi_busy(drive, GET_ALTSTAT(), "read");
+#else
+ byte stat = GET_STAT();
+#endif /* ALTSTAT_SCREW_UP */
+
+ byte io_32bit = drive->io_32bit;
+ struct request *rq = HWGROUP(drive)->rq;
+ char *pBuf = NULL;
+
+ if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+ if (stat & (ERR_STAT|DRQ_STAT)) {
+ return ide_error(drive, "task_mulin_intr", stat);
+ }
+ /* no data yet, so wait for another interrupt */
+ ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+
+ /* (ks/hs): Fixed Multi-Sector transfer */
+ msect = drive->mult_count;
+
+#ifdef ALTSTAT_SCREW_UP
+ /*
+ * Screw the request we do not support bad data-phase setups!
+ * Either read and learn the ATA standard or crash yourself!
+ */
+ if (!msect) {
+ /*
+ * (ks/hs): Drive supports multi-sector transfer,
+ * drive->mult_count was not set
+ */
+ nsect = 1;
+ while (rq->current_nr_sectors) {
+ pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+ DTF("Multiread: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors);
+ drive->io_32bit = 0;
+ taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS);
+ drive->io_32bit = io_32bit;
+ rq->errors = 0;
+ rq->current_nr_sectors -= nsect;
+ stat = altstat_multi_poll(drive, GET_ALTSTAT(), "read");
+ }
+ ide_end_request(1, HWGROUP(drive));
+ return ide_stopped;
+ }
+#endif /* ALTSTAT_SCREW_UP */
+
+ nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors;
+ pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+
+ DTF("Multiread: %p, nsect: %d , rq->current_nr_sectors: %ld\n",
+ pBuf, nsect, rq->current_nr_sectors);
+ drive->io_32bit = 0;
+ taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS);
+ drive->io_32bit = io_32bit;
+ rq->errors = 0;
+ rq->current_nr_sectors -= nsect;
+ if (rq->current_nr_sectors != 0) {
+ ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ ide_end_request(1, HWGROUP(drive));
+ return ide_stopped;
+}
+
+ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq)
+{
+ ide_task_t *args = rq->special;
+ ide_startstop_t startstop;
+
+ if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+ printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, drive->mult_count ? "MULTWRITE" : "WRITE");
+ return startstop;
+ }
+
+ /* (ks/hs): Fixed Multi Write */
+ if ((args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE) &&
+ (args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE_EXT)) {
+ /* For Write_sectors we need to stuff the first sector */
+ taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+ rq->current_nr_sectors--;
+ return ide_started;
+ } else {
+ /*
+ * (ks/hs): Stuff the first sector(s)
+ * by implicitly calling the handler
+ */
+ if (!(drive_is_ready(drive))) {
+ int i;
+ /*
+ * (ks/hs): FIXME: Replace hard-coded
+ * 100, error handling?
+ */
+ for (i=0; i<100; i++) {
+ if (drive_is_ready(drive))
+ break;
+ }
+ }
+ return args->handler(drive);
+ }
+ return ide_started;
+}
+
+/*
+ * Handler for command with PIO data-out phase
+ */
+ide_startstop_t task_out_intr (ide_drive_t *drive)
+{
+ byte stat = GET_STAT();
+ byte io_32bit = drive->io_32bit;
+ struct request *rq = HWGROUP(drive)->rq;
+ char *pBuf = NULL;
+
+ if (!rq->current_nr_sectors) {
+ ide_end_request(1, HWGROUP(drive));
+ return ide_stopped;
+ }
+
+ if (!OK_STAT(stat,DRIVE_READY,drive->bad_wstat)) {
+ return ide_error(drive, "task_out_intr", stat);
+ }
+ if ((rq->current_nr_sectors==1) ^ (stat & DRQ_STAT)) {
+ rq = HWGROUP(drive)->rq;
+ pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+ DTF("write: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors);
+ drive->io_32bit = 0;
+ taskfile_output_data(drive, pBuf, SECTOR_WORDS);
+ drive->io_32bit = io_32bit;
+ rq->errors = 0;
+ rq->current_nr_sectors--;
+ }
+
+ if (rq->current_nr_sectors <= 0) {
+ ide_end_request(1, HWGROUP(drive));
+ } else {
+ ide_set_handler(drive, &task_out_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ return ide_stopped;
+}
+
+/*
+ * Handler for command write multiple
+ * Called directly from execute_drive_cmd for the first bunch of sectors,
+ * afterwards only by the ISR
+ */
+ide_startstop_t task_mulout_intr (ide_drive_t *drive)
+{
+ unsigned int msect, nsect;
+
+#ifdef ALTSTAT_SCREW_UP
+ byte stat = altstat_multi_busy(drive, GET_ALTSTAT(), "write");
+#else
+ byte stat = GET_STAT();
+#endif /* ALTSTAT_SCREW_UP */
+
+ byte io_32bit = drive->io_32bit;
+ struct request *rq = HWGROUP(drive)->rq;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ char *pBuf = NULL;
+
+ /*
+ * (ks/hs): Handle last IRQ on multi-sector transfer,
+ * occurs after all data was sent
+ */
+ if (rq->current_nr_sectors == 0) {
+ if (stat & (ERR_STAT|DRQ_STAT))
+ return ide_error(drive, "task_mulout_intr", stat);
+ ide_end_request(1, HWGROUP(drive));
+ return ide_stopped;
+ }
+
+ if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+ if (stat & (ERR_STAT|DRQ_STAT)) {
+ return ide_error(drive, "task_mulout_intr", stat);
+ }
+ /* no data yet, so wait for another interrupt */
+ if (hwgroup->handler == NULL)
+ ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+
+ /* (ks/hs): See task_mulin_intr */
+ msect = drive->mult_count;
+
+#ifdef ALTSTAT_SCREW_UP
+ /*
+ * Screw the request we do not support bad data-phase setups!
+ * Either read and learn the ATA standard or crash yourself!
+ */
+ if (!msect) {
+ nsect = 1;
+ while (rq->current_nr_sectors) {
+ pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+ DTF("Multiwrite: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors);
+ drive->io_32bit = 0;
+ taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
+ drive->io_32bit = io_32bit;
+ rq->errors = 0;
+ rq->current_nr_sectors -= nsect;
+ stat = altstat_multi_poll(drive, GET_ALTSTAT(), "write");
+ }
+ ide_end_request(1, HWGROUP(drive));
+ return ide_stopped;
+ }
+#endif /* ALTSTAT_SCREW_UP */
+
+ nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors;
+ pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+ DTF("Multiwrite: %p, nsect: %d , rq->current_nr_sectors: %ld\n",
+ pBuf, nsect, rq->current_nr_sectors);
+ drive->io_32bit = 0;
+ taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
+ drive->io_32bit = io_32bit;
+ rq->errors = 0;
+ rq->current_nr_sectors -= nsect;
+ if (hwgroup->handler == NULL)
+ ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL);
+ return ide_started;
+}
+
+/* Called by internal to feature out type of command being called */
+ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile)
+{
+ switch(taskfile->command) {
+ /* IDE_DRIVE_TASK_RAW_WRITE */
+ case CFA_WRITE_MULTI_WO_ERASE:
+ case WIN_MULTWRITE:
+ case WIN_MULTWRITE_EXT:
+// case WIN_WRITEDMA:
+// case WIN_WRITEDMA_QUEUED:
+// case WIN_WRITEDMA_EXT:
+// case WIN_WRITEDMA_QUEUED_EXT:
+ /* IDE_DRIVE_TASK_OUT */
+ case WIN_WRITE:
+ case WIN_WRITE_VERIFY:
+ case WIN_WRITE_BUFFER:
+ case CFA_WRITE_SECT_WO_ERASE:
+ case WIN_DOWNLOAD_MICROCODE:
+ return &pre_task_out_intr;
+ /* IDE_DRIVE_TASK_OUT */
+ case WIN_SMART:
+ if (taskfile->feature == SMART_WRITE_LOG_SECTOR)
+ return &pre_task_out_intr;
+ default:
+ break;
+ }
+ return(NULL);
+}
+
+/* Called by internal to feature out type of command being called */
+ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile)
+{
+ switch(taskfile->command) {
+ case WIN_IDENTIFY:
+ case WIN_PIDENTIFY:
+ case CFA_TRANSLATE_SECTOR:
+ case WIN_READ_BUFFER:
+ case WIN_READ:
+ case WIN_READ_EXT:
+ return &task_in_intr;
+ case WIN_SECURITY_DISABLE:
+ case WIN_SECURITY_ERASE_UNIT:
+ case WIN_SECURITY_SET_PASS:
+ case WIN_SECURITY_UNLOCK:
+ case WIN_DOWNLOAD_MICROCODE:
+ case CFA_WRITE_SECT_WO_ERASE:
+ case WIN_WRITE_BUFFER:
+ case WIN_WRITE_VERIFY:
+ case WIN_WRITE:
+ case WIN_WRITE_EXT:
+ return &task_out_intr;
+ case WIN_MULTREAD:
+ case WIN_MULTREAD_EXT:
+ return &task_mulin_intr;
+ case CFA_WRITE_MULTI_WO_ERASE:
+ case WIN_MULTWRITE:
+ case WIN_MULTWRITE_EXT:
+ return &task_mulout_intr;
+ case WIN_SMART:
+ switch(taskfile->feature) {
+ case SMART_READ_VALUES:
+ case SMART_READ_THRESHOLDS:
+ case SMART_READ_LOG_SECTOR:
+ return &task_in_intr;
+ case SMART_WRITE_LOG_SECTOR:
+ return &task_out_intr;
+ default:
+ return &task_no_data_intr;
+ }
+ case CFA_REQ_EXT_ERROR_CODE:
+ case CFA_ERASE_SECTORS:
+ case WIN_VERIFY:
+ case WIN_VERIFY_EXT:
+ case WIN_SEEK:
+ return &task_no_data_intr;
+ case WIN_SPECIFY:
+ return &set_geometry_intr;
+ case WIN_RESTORE:
+ return &recal_intr;
+ case WIN_DIAGNOSE:
+ case WIN_FLUSH_CACHE:
+ case WIN_FLUSH_CACHE_EXT:
+ case WIN_STANDBYNOW1:
+ case WIN_STANDBYNOW2:
+ case WIN_SLEEPNOW1:
+ case WIN_SLEEPNOW2:
+ case WIN_SETIDLE1:
+ case WIN_CHECKPOWERMODE1:
+ case WIN_CHECKPOWERMODE2:
+ case WIN_GETMEDIASTATUS:
+ case WIN_MEDIAEJECT:
+ return &task_no_data_intr;
+ case WIN_SETMULT:
+ return &set_multmode_intr;
+ case WIN_READ_NATIVE_MAX:
+ case WIN_SET_MAX:
+ case WIN_READ_NATIVE_MAX_EXT:
+ case WIN_SET_MAX_EXT:
+ case WIN_SECURITY_ERASE_PREPARE:
+ case WIN_SECURITY_FREEZE_LOCK:
+ case WIN_DOORLOCK:
+ case WIN_DOORUNLOCK:
+ case WIN_SETFEATURES:
+ return &task_no_data_intr;
+ case DISABLE_SEAGATE:
+ case EXABYTE_ENABLE_NEST:
+ return &task_no_data_intr;
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ case WIN_READDMA:
+ case WIN_IDENTIFY_DMA:
+ case WIN_READDMA_QUEUED:
+ case WIN_READDMA_EXT:
+ case WIN_READDMA_QUEUED_EXT:
+ case WIN_WRITEDMA:
+ case WIN_WRITEDMA_QUEUED:
+ case WIN_WRITEDMA_EXT:
+ case WIN_WRITEDMA_QUEUED_EXT:
+#endif
+ case WIN_FORMAT:
+ case WIN_INIT:
+ case WIN_DEVICE_RESET:
+ case WIN_QUEUED_SERVICE:
+ case WIN_PACKETCMD:
+ default:
+ return(NULL);
+ }
+}
+
+/* Called by ioctl to feature out type of command being called */
+int ide_cmd_type_parser (ide_task_t *args)
+{
+ struct hd_drive_task_hdr *taskfile = (struct hd_drive_task_hdr *) args->tfRegister;
+ struct hd_drive_hob_hdr *hobfile = (struct hd_drive_hob_hdr *) args->hobRegister;
+
+ args->prehandler = ide_pre_handler_parser(taskfile, hobfile);
+ args->handler = ide_handler_parser(taskfile, hobfile);
+
+ switch(args->tfRegister[IDE_COMMAND_OFFSET]) {
+ case WIN_IDENTIFY:
+ case WIN_PIDENTIFY:
+ return IDE_DRIVE_TASK_IN;
+ case CFA_TRANSLATE_SECTOR:
+ case WIN_READ:
+ case WIN_READ_BUFFER:
+ return IDE_DRIVE_TASK_IN;
+ case WIN_WRITE:
+ case WIN_WRITE_VERIFY:
+ case WIN_WRITE_BUFFER:
+ case CFA_WRITE_SECT_WO_ERASE:
+ case WIN_DOWNLOAD_MICROCODE:
+ return IDE_DRIVE_TASK_RAW_WRITE;
+ case WIN_MULTREAD:
+ return IDE_DRIVE_TASK_IN;
+ case CFA_WRITE_MULTI_WO_ERASE:
+ case WIN_MULTWRITE:
+ return IDE_DRIVE_TASK_RAW_WRITE;
+ case WIN_SECURITY_DISABLE:
+ case WIN_SECURITY_ERASE_UNIT:
+ case WIN_SECURITY_SET_PASS:
+ case WIN_SECURITY_UNLOCK:
+ return IDE_DRIVE_TASK_OUT;
+ case WIN_SMART:
+ args->tfRegister[IDE_LCYL_OFFSET] = SMART_LCYL_PASS;
+ args->tfRegister[IDE_HCYL_OFFSET] = SMART_HCYL_PASS;
+ switch(args->tfRegister[IDE_FEATURE_OFFSET]) {
+ case SMART_READ_VALUES:
+ case SMART_READ_THRESHOLDS:
+ case SMART_READ_LOG_SECTOR:
+ return IDE_DRIVE_TASK_IN;
+ case SMART_WRITE_LOG_SECTOR:
+ return IDE_DRIVE_TASK_OUT;
+ default:
+ return IDE_DRIVE_TASK_NO_DATA;
+ }
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ case WIN_READDMA:
+ case WIN_IDENTIFY_DMA:
+ case WIN_READDMA_QUEUED:
+ case WIN_READDMA_EXT:
+ case WIN_READDMA_QUEUED_EXT:
+ return IDE_DRIVE_TASK_IN;
+ case WIN_WRITEDMA:
+ case WIN_WRITEDMA_QUEUED:
+ case WIN_WRITEDMA_EXT:
+ case WIN_WRITEDMA_QUEUED_EXT:
+ return IDE_DRIVE_TASK_RAW_WRITE;
+#endif
+ case WIN_SETFEATURES:
+ switch(args->tfRegister[IDE_FEATURE_OFFSET]) {
+ case SETFEATURES_XFER:
+ return IDE_DRIVE_TASK_SET_XFER;
+ case SETFEATURES_DIS_DEFECT:
+ case SETFEATURES_EN_APM:
+ case SETFEATURES_DIS_MSN:
+ case SETFEATURES_EN_RI:
+ case SETFEATURES_EN_SI:
+ case SETFEATURES_DIS_RPOD:
+ case SETFEATURES_DIS_WCACHE:
+ case SETFEATURES_EN_DEFECT:
+ case SETFEATURES_DIS_APM:
+ case SETFEATURES_EN_MSN:
+ case SETFEATURES_EN_RLA:
+ case SETFEATURES_PREFETCH:
+ case SETFEATURES_EN_RPOD:
+ case SETFEATURES_DIS_RI:
+ case SETFEATURES_DIS_SI:
+ default:
+ return IDE_DRIVE_TASK_NO_DATA;
+ }
+ case WIN_NOP:
+ case CFA_REQ_EXT_ERROR_CODE:
+ case CFA_ERASE_SECTORS:
+ case WIN_VERIFY:
+ case WIN_VERIFY_EXT:
+ case WIN_SEEK:
+ case WIN_SPECIFY:
+ case WIN_RESTORE:
+ case WIN_DIAGNOSE:
+ case WIN_FLUSH_CACHE:
+ case WIN_FLUSH_CACHE_EXT:
+ case WIN_STANDBYNOW1:
+ case WIN_STANDBYNOW2:
+ case WIN_SLEEPNOW1:
+ case WIN_SLEEPNOW2:
+ case WIN_SETIDLE1:
+ case DISABLE_SEAGATE:
+ case WIN_CHECKPOWERMODE1:
+ case WIN_CHECKPOWERMODE2:
+ case WIN_GETMEDIASTATUS:
+ case WIN_MEDIAEJECT:
+ case WIN_SETMULT:
+ case WIN_READ_NATIVE_MAX:
+ case WIN_SET_MAX:
+ case WIN_READ_NATIVE_MAX_EXT:
+ case WIN_SET_MAX_EXT:
+ case WIN_SECURITY_ERASE_PREPARE:
+ case WIN_SECURITY_FREEZE_LOCK:
+ case EXABYTE_ENABLE_NEST:
+ case WIN_DOORLOCK:
+ case WIN_DOORUNLOCK:
+ return IDE_DRIVE_TASK_NO_DATA;
+ case WIN_FORMAT:
+ case WIN_INIT:
+ case WIN_DEVICE_RESET:
+ case WIN_QUEUED_SERVICE:
+ case WIN_PACKETCMD:
+ default:
+ return IDE_DRIVE_TASK_INVALID;
+ }
+}
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_taskfile (struct request *rq)
+{
+ memset(rq, 0, sizeof(*rq));
+ rq->cmd = IDE_DRIVE_TASK_NO_DATA;
+}
+
+/*
+ * This is kept for internal use only !!!
+ * This is an internal call and nobody in user-space has a damn
+ * reason to call this taskfile.
+ *
+ * ide_raw_taskfile is the one that user-space executes.
+ */
+int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf)
+{
+ struct request rq;
+ ide_task_t args;
+
+ memset(&args, 0, sizeof(ide_task_t));
+
+ args.tfRegister[IDE_DATA_OFFSET] = taskfile->data;
+ args.tfRegister[IDE_FEATURE_OFFSET] = taskfile->feature;
+ args.tfRegister[IDE_NSECTOR_OFFSET] = taskfile->sector_count;
+ args.tfRegister[IDE_SECTOR_OFFSET] = taskfile->sector_number;
+ args.tfRegister[IDE_LCYL_OFFSET] = taskfile->low_cylinder;
+ args.tfRegister[IDE_HCYL_OFFSET] = taskfile->high_cylinder;
+ args.tfRegister[IDE_SELECT_OFFSET] = taskfile->device_head;
+ args.tfRegister[IDE_COMMAND_OFFSET] = taskfile->command;
+
+ args.hobRegister[IDE_DATA_OFFSET_HOB] = hobfile->data;
+ args.hobRegister[IDE_FEATURE_OFFSET_HOB] = hobfile->feature;
+ args.hobRegister[IDE_NSECTOR_OFFSET_HOB] = hobfile->sector_count;
+ args.hobRegister[IDE_SECTOR_OFFSET_HOB] = hobfile->sector_number;
+ args.hobRegister[IDE_LCYL_OFFSET_HOB] = hobfile->low_cylinder;
+ args.hobRegister[IDE_HCYL_OFFSET_HOB] = hobfile->high_cylinder;
+ args.hobRegister[IDE_SELECT_OFFSET_HOB] = hobfile->device_head;
+ args.hobRegister[IDE_CONTROL_OFFSET_HOB] = hobfile->control;
+
+ ide_init_drive_taskfile(&rq);
+ /* This is kept for internal use only !!! */
+ args.command_type = ide_cmd_type_parser (&args);
+ if (args.command_type != IDE_DRIVE_TASK_NO_DATA)
+ rq.current_nr_sectors = rq.nr_sectors = (hobfile->sector_count << 8) | taskfile->sector_count;
+
+ rq.cmd = IDE_DRIVE_TASKFILE;
+ rq.buffer = buf;
+ rq.special = &args;
+ return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *args, byte *buf)
+{
+ struct request rq;
+ ide_init_drive_taskfile(&rq);
+ rq.cmd = IDE_DRIVE_TASKFILE;
+ rq.buffer = buf;
+
+ if (args->command_type != IDE_DRIVE_TASK_NO_DATA)
+ rq.current_nr_sectors = rq.nr_sectors = (args->hobRegister[IDE_NSECTOR_OFFSET_HOB] << 8) | args->tfRegister[IDE_NSECTOR_OFFSET];
+
+ rq.special = args;
+ return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+char * ide_ioctl_verbose (unsigned int cmd)
+{
+ return("unknown");
+}
+
+char * ide_task_cmd_verbose (byte task)
+{
+ return("unknown");
+}
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+/*
+ * The taskfile glue table
+ *
+ * reqtask.data_phase reqtask.req_cmd
+ * args.command_type args.handler
+ *
+ * TASKFILE_P_OUT_DMAQ ?? ??
+ * TASKFILE_P_IN_DMAQ ?? ??
+ * TASKFILE_P_OUT_DMA ?? ??
+ * TASKFILE_P_IN_DMA ?? ??
+ * TASKFILE_P_OUT ?? ??
+ * TASKFILE_P_IN ?? ??
+ *
+ * TASKFILE_OUT_DMAQ IDE_DRIVE_TASK_RAW_WRITE NULL
+ * TASKFILE_IN_DMAQ IDE_DRIVE_TASK_IN NULL
+ *
+ * TASKFILE_OUT_DMA IDE_DRIVE_TASK_RAW_WRITE NULL
+ * TASKFILE_IN_DMA IDE_DRIVE_TASK_IN NULL
+ *
+ * TASKFILE_IN_OUT ?? ??
+ *
+ * TASKFILE_MULTI_OUT IDE_DRIVE_TASK_RAW_WRITE task_mulout_intr
+ * TASKFILE_MULTI_IN IDE_DRIVE_TASK_IN task_mulin_intr
+ *
+ * TASKFILE_OUT IDE_DRIVE_TASK_RAW_WRITE task_out_intr
+ * TASKFILE_OUT IDE_DRIVE_TASK_OUT task_out_intr
+ *
+ * TASKFILE_IN IDE_DRIVE_TASK_IN task_in_intr
+ * TASKFILE_NO_DATA IDE_DRIVE_TASK_NO_DATA task_no_data_intr
+ *
+ * IDE_DRIVE_TASK_SET_XFER task_no_data_intr
+ * IDE_DRIVE_TASK_INVALID
+ *
+ */
+
+#define MAX_DMA (256*SECTOR_WORDS)
+
+int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ ide_task_request_t *req_task;
+ ide_task_t args;
+
+ byte *outbuf = NULL;
+ byte *inbuf = NULL;
+ task_ioreg_t *argsptr = args.tfRegister;
+ task_ioreg_t *hobsptr = args.hobRegister;
+ int err = 0;
+ int tasksize = sizeof(struct ide_task_request_s);
+ int taskin = 0;
+ int taskout = 0;
+
+ req_task = kmalloc(tasksize, GFP_KERNEL);
+ if (req_task == NULL) return -ENOMEM;
+ memset(req_task, 0, tasksize);
+ if (copy_from_user(req_task, (void *) arg, tasksize)) {
+ kfree(req_task);
+ return -EFAULT;
+ }
+
+ taskout = (int) req_task->out_size;
+ taskin = (int) req_task->in_size;
+
+ if (taskout) {
+ int outtotal = tasksize;
+ outbuf = kmalloc(taskout, GFP_KERNEL);
+ if (outbuf == NULL) {
+ err = -ENOMEM;
+ goto abort;
+ }
+ memset(outbuf, 0, taskout);
+ if (copy_from_user(outbuf, (void *)arg + outtotal, taskout)) {
+ err = -EFAULT;
+ goto abort;
+ }
+ }
+
+ if (taskin) {
+ int intotal = tasksize + taskout;
+ inbuf = kmalloc(taskin, GFP_KERNEL);
+ if (inbuf == NULL) {
+ err = -ENOMEM;
+ goto abort;
+ }
+ memset(inbuf, 0, taskin);
+ if (copy_from_user(inbuf, (void *)arg + intotal , taskin)) {
+ err = -EFAULT;
+ goto abort;
+ }
+ }
+
+ memset(argsptr, 0, HDIO_DRIVE_TASK_HDR_SIZE);
+ memset(hobsptr, 0, HDIO_DRIVE_HOB_HDR_SIZE);
+ memcpy(argsptr, req_task->io_ports, HDIO_DRIVE_TASK_HDR_SIZE);
+ memcpy(hobsptr, req_task->hob_ports, HDIO_DRIVE_HOB_HDR_SIZE);
+
+ args.tf_in_flags = req_task->in_flags;
+ args.tf_out_flags = req_task->out_flags;
+ args.data_phase = req_task->data_phase;
+ args.command_type = req_task->req_cmd;
+
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+ DTF("%s: ide_ioctl_cmd %s: ide_task_cmd %s\n",
+ drive->name,
+ ide_ioctl_verbose(cmd),
+ ide_task_cmd_verbose(args.tfRegister[IDE_COMMAND_OFFSET]));
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+ switch(req_task->data_phase) {
+ case TASKFILE_OUT_DMAQ:
+ case TASKFILE_OUT_DMA:
+ args.prehandler = NULL;
+ args.handler = NULL;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, outbuf);
+ break;
+ case TASKFILE_IN_DMAQ:
+ case TASKFILE_IN_DMA:
+ args.prehandler = NULL;
+ args.handler = NULL;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, inbuf);
+ break;
+ case TASKFILE_IN_OUT:
+#if 0
+ args.prehandler = &pre_task_out_intr;
+ args.handler = &task_out_intr;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, outbuf);
+ args.prehandler = NULL;
+ args.handler = &task_in_intr;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, inbuf);
+ break;
+#else
+ err = -EFAULT;
+ goto abort;
+#endif
+ case TASKFILE_MULTI_OUT:
+ if (drive->mult_count) {
+ args.prehandler = &pre_task_out_intr;
+ args.handler = &task_mulout_intr;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, outbuf);
+ } else {
+ /* (hs): give up if multcount is not set */
+ printk("%s: %s Multimode Write " \
+ "multcount is not set\n",
+ drive->name, __FUNCTION__);
+ err = -EPERM;
+ goto abort;
+ }
+ break;
+ case TASKFILE_OUT:
+ args.prehandler = &pre_task_out_intr;
+ args.handler = &task_out_intr;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, outbuf);
+ break;
+ case TASKFILE_MULTI_IN:
+ if (drive->mult_count) {
+ args.prehandler = NULL;
+ args.handler = &task_mulin_intr;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, inbuf);
+ } else {
+ /* (hs): give up if multcount is not set */
+ printk("%s: %s Multimode Read failure " \
+ "multcount is not set\n",
+ drive->name, __FUNCTION__);
+ err = -EPERM;
+ goto abort;
+ }
+ break;
+ case TASKFILE_IN:
+ args.prehandler = NULL;
+ args.handler = &task_in_intr;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, inbuf);
+ break;
+ case TASKFILE_NO_DATA:
+ args.prehandler = NULL;
+ args.handler = &task_no_data_intr;
+ args.posthandler = NULL;
+ err = ide_raw_taskfile(drive, &args, NULL);
+ break;
+ default:
+ args.prehandler = NULL;
+ args.handler = NULL;
+ args.posthandler = NULL;
+ err = -EFAULT;
+ goto abort;
+ }
+
+ memcpy(req_task->io_ports, &(args.tfRegister), HDIO_DRIVE_TASK_HDR_SIZE);
+ memcpy(req_task->hob_ports, &(args.hobRegister), HDIO_DRIVE_HOB_HDR_SIZE);
+ req_task->in_flags = args.tf_in_flags;
+ req_task->out_flags = args.tf_out_flags;
+
+ if (copy_to_user((void *)arg, req_task, tasksize)) {
+ err = -EFAULT;
+ goto abort;
+ }
+ if (taskout) {
+ int outtotal = tasksize;
+ if (copy_to_user((void *)arg+outtotal, outbuf, taskout)) {
+ err = -EFAULT;
+ goto abort;
+ }
+ }
+ if (taskin) {
+ int intotal = tasksize + taskout;
+ if (copy_to_user((void *)arg+intotal, inbuf, taskin)) {
+ err = -EFAULT;
+ goto abort;
+ }
+ }
+abort:
+ kfree(req_task);
+ if (outbuf != NULL)
+ kfree(outbuf);
+ if (inbuf != NULL)
+ kfree(inbuf);
+ return err;
+}
+
+EXPORT_SYMBOL(task_read_24);
+EXPORT_SYMBOL(do_rw_taskfile);
+EXPORT_SYMBOL(do_taskfile);
+// EXPORT_SYMBOL(flagged_taskfile);
+
+//EXPORT_SYMBOL(ide_end_taskfile);
+
+EXPORT_SYMBOL(set_multmode_intr);
+EXPORT_SYMBOL(set_geometry_intr);
+EXPORT_SYMBOL(recal_intr);
+
+EXPORT_SYMBOL(task_no_data_intr);
+EXPORT_SYMBOL(task_in_intr);
+EXPORT_SYMBOL(task_mulin_intr);
+EXPORT_SYMBOL(pre_task_out_intr);
+EXPORT_SYMBOL(task_out_intr);
+EXPORT_SYMBOL(task_mulout_intr);
+
+EXPORT_SYMBOL(ide_init_drive_taskfile);
+EXPORT_SYMBOL(ide_wait_taskfile);
+EXPORT_SYMBOL(ide_raw_taskfile);
+EXPORT_SYMBOL(ide_pre_handler_parser);
+EXPORT_SYMBOL(ide_handler_parser);
+EXPORT_SYMBOL(ide_cmd_type_parser);
+EXPORT_SYMBOL(ide_taskfile_ioctl);
+
+#ifdef CONFIG_PKT_TASK_IOCTL
+
+#if 0
+{
+
+{ /* start cdrom */
+
+ struct cdrom_info *info = drive->driver_data;
+
+ if (info->dma) {
+ if (info->cmd == READ) {
+ info->dma = !HWIF(drive)->dmaproc(ide_dma_read, drive);
+ } else if (info->cmd == WRITE) {
+ info->dma = !HWIF(drive)->dmaproc(ide_dma_write, drive);
+ } else {
+ printk("ide-cd: DMA set, but not allowed\n");
+ }
+ }
+
+ /* Set up the controller registers. */
+ OUT_BYTE (info->dma, IDE_FEATURE_REG);
+ OUT_BYTE (0, IDE_NSECTOR_REG);
+ OUT_BYTE (0, IDE_SECTOR_REG);
+
+ OUT_BYTE (xferlen & 0xff, IDE_LCYL_REG);
+ OUT_BYTE (xferlen >> 8 , IDE_HCYL_REG);
+ if (IDE_CONTROL_REG)
+ OUT_BYTE (drive->ctl, IDE_CONTROL_REG);
+
+ if (info->dma)
+ (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+
+ if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) {
+ ide_set_handler (drive, handler, WAIT_CMD, cdrom_timer_expiry);
+ OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */
+ return ide_started;
+ } else {
+ OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */
+ return (*handler) (drive);
+ }
+
+} /* end cdrom */
+
+{ /* start floppy */
+
+ idefloppy_floppy_t *floppy = drive->driver_data;
+ idefloppy_bcount_reg_t bcount;
+ int dma_ok = 0;
+
+ floppy->pc=pc; /* Set the current packet command */
+
+ pc->retries++;
+ pc->actually_transferred=0; /* We haven't transferred any data yet */
+ pc->current_position=pc->buffer;
+ bcount.all = IDE_MIN(pc->request_transfer, 63 * 1024);
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) {
+ (void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+ }
+ if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+ dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+ if (IDE_CONTROL_REG)
+ OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+ OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */
+ OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+ OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+ OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (dma_ok) { /* Begin DMA, if necessary */
+ set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+ (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+ }
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+} /* end floppy */
+
+{ /* start tape */
+
+ idetape_tape_t *tape = drive->driver_data;
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) {
+ printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n");
+ (void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+ }
+ if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+ dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+ if (IDE_CONTROL_REG)
+ OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+ OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */
+ OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+ OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+ OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (dma_ok) { /* Begin DMA, if necessary */
+ set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+ (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+ }
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+ if (test_bit(IDETAPE_DRQ_INTERRUPT, &tape->flags)) {
+ ide_set_handler(drive, &idetape_transfer_pc, IDETAPE_WAIT_CMD, NULL);
+ OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG);
+ return ide_started;
+ } else {
+ OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG);
+ return idetape_transfer_pc(drive);
+ }
+
+} /* end tape */
+
+}
+#endif
+
+int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+#if 0
+ switch(req_task->data_phase) {
+ case TASKFILE_P_OUT_DMAQ:
+ case TASKFILE_P_IN_DMAQ:
+ case TASKFILE_P_OUT_DMA:
+ case TASKFILE_P_IN_DMA:
+ case TASKFILE_P_OUT:
+ case TASKFILE_P_IN:
+ }
+#endif
+ return -ENOMSG;
+}
+
+EXPORT_SYMBOL(pkt_taskfile_ioctl);
+
+#endif /* CONFIG_PKT_TASK_IOCTL */
diff --git a/xen/drivers/ide/ide-xeno.c b/xen/drivers/ide/ide-xeno.c
new file mode 100644
index 0000000000..0b7e481ddf
--- /dev/null
+++ b/xen/drivers/ide/ide-xeno.c
@@ -0,0 +1,41 @@
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/ide.h>
+#include <hypervisor-ifs/block.h>
+#include <asm/domain_page.h>
+#include <asm/io.h>
+
+void ide_probe_devices (xen_disk_info_t* xdi)
+{
+ int loop;
+ unsigned int unit;
+ xen_disk_info_t *xen_xdi = map_domain_mem(virt_to_phys(xdi));
+
+ for (loop = 0; loop < MAX_HWIFS; ++loop) {
+
+ ide_hwif_t *hwif = &ide_hwifs[loop];
+ if (hwif->present) {
+
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ unsigned long capacity;
+ ide_drive_t *drive = &hwif->drives[unit];
+
+ if (drive->present) {
+ capacity = current_capacity (drive);
+ xen_xdi->disks[xen_xdi->count].type = XEN_DISK_IDE;
+ xen_xdi->disks[xen_xdi->count].capacity = capacity;
+ xen_xdi->count++;
+
+ printk (KERN_ALERT "IDE-XENO %d\n", xen_xdi->count);
+ printk (KERN_ALERT " capacity 0x%lx\n", capacity);
+ printk (KERN_ALERT " head 0x%x\n", drive->bios_head);
+ printk (KERN_ALERT " sector 0x%x\n", drive->bios_sect);
+ printk (KERN_ALERT " cylinder 0x%x\n", drive->bios_cyl);
+ }
+ }
+ }
+ }
+
+ unmap_domain_mem(xen_xdi);
+}
diff --git a/xen/drivers/ide/ide.c b/xen/drivers/ide/ide.c
new file mode 100644
index 0000000000..1db4e34834
--- /dev/null
+++ b/xen/drivers/ide/ide.c
@@ -0,0 +1,4197 @@
+/*
+ * linux/drivers/ide/ide.c Version 6.31 June 9, 2000
+ *
+ * Copyright (C) 1994-1998 Linus Torvalds & authors (see below)
+ */
+
+/*
+ * Mostly written by Mark Lord <mlord@pobox.com>
+ * and Gadi Oxman <gadio@netvision.net.il>
+ * and Andre Hedrick <andre@linux-ide.org>
+ *
+ * See linux/MAINTAINERS for address of current maintainer.
+ *
+ * This is the multiple IDE interface driver, as evolved from hd.c.
+ * It supports up to MAX_HWIFS IDE interfaces, on one or more IRQs (usually 14 & 15).
+ * There can be up to two drives per interface, as per the ATA-2 spec.
+ *
+ * Primary: ide0, port 0x1f0; major=3; hda is minor=0; hdb is minor=64
+ * Secondary: ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64
+ * Tertiary: ide2, port 0x???; major=33; hde is minor=0; hdf is minor=64
+ * Quaternary: ide3, port 0x???; major=34; hdg is minor=0; hdh is minor=64
+ * ...
+ *
+ * From hd.c:
+ * |
+ * | It traverses the request-list, using interrupts to jump between functions.
+ * | As nearly all functions can be called within interrupts, we may not sleep.
+ * | Special care is recommended. Have Fun!
+ * |
+ * | modified by Drew Eckhardt to check nr of hd's from the CMOS.
+ * |
+ * | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ * | in the early extended-partition checks and added DM partitions.
+ * |
+ * | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI).
+ * |
+ * | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
+ * | and general streamlining by Mark Lord (mlord@pobox.com).
+ *
+ * October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by:
+ *
+ * Mark Lord (mlord@pobox.com) (IDE Perf.Pkg)
+ * Delman Lee (delman@ieee.org) ("Mr. atdisk2")
+ * Scott Snyder (snyder@fnald0.fnal.gov) (ATAPI IDE cd-rom)
+ *
+ * This was a rewrite of just about everything from hd.c, though some original
+ * code is still sprinkled about. Think of it as a major evolution, with
+ * inspiration from lots of linux users, esp. hamish@zot.apana.org.au
+ *
+ * Version 1.0 ALPHA initial code, primary i/f working okay
+ * Version 1.3 BETA dual i/f on shared irq tested & working!
+ * Version 1.4 BETA added auto probing for irq(s)
+ * Version 1.5 BETA added ALPHA (untested) support for IDE cd-roms,
+ * ...
+ * Version 5.50 allow values as small as 20 for idebus=
+ * Version 5.51 force non io_32bit in drive_cmd_intr()
+ * change delay_10ms() to delay_50ms() to fix problems
+ * Version 5.52 fix incorrect invalidation of removable devices
+ * add "hdx=slow" command line option
+ * Version 5.60 start to modularize the driver; the disk and ATAPI
+ * drivers can be compiled as loadable modules.
+ * move IDE probe code to ide-probe.c
+ * move IDE disk code to ide-disk.c
+ * add support for generic IDE device subdrivers
+ * add m68k code from Geert Uytterhoeven
+ * probe all interfaces by default
+ * add ioctl to (re)probe an interface
+ * Version 6.00 use per device request queues
+ * attempt to optimize shared hwgroup performance
+ * add ioctl to manually adjust bandwidth algorithms
+ * add kerneld support for the probe module
+ * fix bug in ide_error()
+ * fix bug in the first ide_get_lock() call for Atari
+ * don't flush leftover data for ATAPI devices
+ * Version 6.01 clear hwgroup->active while the hwgroup sleeps
+ * support HDIO_GETGEO for floppies
+ * Version 6.02 fix ide_ack_intr() call
+ * check partition table on floppies
+ * Version 6.03 handle bad status bit sequencing in ide_wait_stat()
+ * Version 6.10 deleted old entries from this list of updates
+ * replaced triton.c with ide-dma.c generic PCI DMA
+ * added support for BIOS-enabled UltraDMA
+ * rename all "promise" things to "pdc4030"
+ * fix EZ-DRIVE handling on small disks
+ * Version 6.11 fix probe error in ide_scan_devices()
+ * fix ancient "jiffies" polling bugs
+ * mask all hwgroup interrupts on each irq entry
+ * Version 6.12 integrate ioctl and proc interfaces
+ * fix parsing of "idex=" command line parameter
+ * Version 6.13 add support for ide4/ide5 courtesy rjones@orchestream.com
+ * Version 6.14 fixed IRQ sharing among PCI devices
+ * Version 6.15 added SMP awareness to IDE drivers
+ * Version 6.16 fixed various bugs; even more SMP friendly
+ * Version 6.17 fix for newest EZ-Drive problem
+ * Version 6.18 default unpartitioned-disk translation now "BIOS LBA"
+ * Version 6.19 Re-design for a UNIFORM driver for all platforms,
+ * model based on suggestions from Russell King and
+ * Geert Uytterhoeven
+ * Promise DC4030VL now supported.
+ * add support for ide6/ide7
+ * delay_50ms() changed to ide_delay_50ms() and exported.
+ * Version 6.20 Added/Fixed Generic ATA-66 support and hwif detection.
+ * Added hdx=flash to allow for second flash disk
+ * detection w/o the hang loop.
+ * Added support for ide8/ide9
+ * Added idex=ata66 for the quirky chipsets that are
+ * ATA-66 compliant, but have yet to determine a method
+ * of verification of the 80c cable presence.
+ * Specifically Promise's PDC20262 chipset.
+ * Version 6.21 Fixing/Fixed SMP spinlock issue with insight from an old
+ * hat that clarified original low level driver design.
+ * Version 6.30 Added SMP support; fixed multmode issues. -ml
+ * Version 6.31 Debug Share INTR's and request queue streaming
+ * Native ATA-100 support
+ * Prep for Cascades Project
+ *
+ * Some additional driver compile-time options are in ./include/linux/ide.h
+ *
+ * To do, in likely order of completion:
+ * - modify kernel to obtain BIOS geometry for drives on 2nd/3rd/4th i/f
+ *
+ */
+
+#define REVISION "Revision: 6.31"
+#define VERSION "Id: ide.c 6.31 2000/06/09"
+
+#undef REALLY_SLOW_IO /* most systems can safely undef this */
+
+#define _IDE_C /* Tell ide.h it's really us */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+/*#include <xeno/kernel.h>*/
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/init.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+/*#include <xeno/devfs_fs_kernel.h>*/
+/*#include <xeno/completion.h>*/
+/*#include <xeno/reboot.h>*/
+
+#include <asm/domain_page.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#include "ide_modes.h"
+
+#ifdef CONFIG_KMOD
+#include <xeno/kmod.h>
+#endif /* CONFIG_KMOD */
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+# define __TASKFILE__IO
+#else /* CONFIG_IDE_TASKFILE_IO */
+# undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#ifdef __TASKFILE__IO
+#else /* !__TASKFILE__IO */
+#endif /* __TASKFILE__IO */
+
+
+
+
+/* XXXXXXXXXXXX This may be replaced by fs/block_dev.c versions!!! XXXXX */
+/* (only included here so the hypervisor will link :-) */
+int check_disk_change(kdev_t dev) { return 0; }
+int unregister_blkdev(unsigned int major, const char * name) { return 0; }
+/* And these ones are from fs/inode.c... */
+int invalidate_device(kdev_t dev, int do_sync) { return 0; }
+/* fs/buffer.c... */
+void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) { }
+/* fs/partitions/check.c... */
+void grok_partitions(struct gendisk *dev, int drive,
+ unsigned minors, long size) { }
+void register_disk(struct gendisk *dev, kdev_t first,
+ unsigned minors, struct block_device_operations *ops,
+ long size) { }
+/* fs/devices.c... */
+const char * kdevname(kdev_t dev) { return NULL; }
+/* End of XXXXXX region */
+
+
+
+
+/* default maximum number of failures */
+#define IDE_DEFAULT_MAX_FAILURES 1
+
+static const byte ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
+
+static int idebus_parameter; /* holds the "idebus=" parameter */
+static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */
+static int initializing; /* set while initializing built-in drivers */
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+/*
+ * ide_lock is used by the Atari code to obtain access to the IDE interrupt,
+ * which is shared between several drivers.
+ */
+static int ide_lock;
+#endif /* __mc68000__ || CONFIG_APUS */
+
+int noautodma = 0;
+
+/*
+ * ide_modules keeps track of the available IDE chipset/probe/driver modules.
+ */
+ide_module_t *ide_modules;
+ide_module_t *ide_probe;
+
+/*
+ * This is declared extern in ide.h, for access by other IDE modules:
+ */
+ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */
+
+#if (DISK_RECOVERY_TIME > 0)
+/*
+ * For really screwy hardware (hey, at least it *can* be used with Linux)
+ * we can enforce a minimum delay time between successive operations.
+ */
+static unsigned long read_timer (void)
+{
+ unsigned long t, flags;
+ int i;
+
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only */
+ t = jiffies * 11932;
+ outb_p(0, 0x43);
+ i = inb_p(0x40);
+ i |= inb(0x40) << 8;
+ __restore_flags(flags); /* local CPU only */
+ return (t - i);
+}
+#endif /* DISK_RECOVERY_TIME */
+
+static inline void set_recovery_timer (ide_hwif_t *hwif)
+{
+#if (DISK_RECOVERY_TIME > 0)
+ hwif->last_time = read_timer();
+#endif /* DISK_RECOVERY_TIME */
+}
+
+/*
+ * Do not even *think* about calling this!
+ */
+static void init_hwif_data (unsigned int index)
+{
+ unsigned int unit;
+ hw_regs_t hw;
+ ide_hwif_t *hwif = &ide_hwifs[index];
+
+ /* bulk initialize hwif & drive info with zeros */
+ memset(hwif, 0, sizeof(ide_hwif_t));
+ memset(&hw, 0, sizeof(hw_regs_t));
+
+ /* fill in any non-zero initial values */
+ hwif->index = index;
+ ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq);
+ memcpy(&hwif->hw, &hw, sizeof(hw));
+ memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports));
+ hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
+#ifdef CONFIG_BLK_DEV_HD
+ if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA)
+ hwif->noprobe = 1; /* may be overridden by ide_setup() */
+#endif /* CONFIG_BLK_DEV_HD */
+ hwif->major = ide_hwif_to_major[index];
+ hwif->name[0] = 'i';
+ hwif->name[1] = 'd';
+ hwif->name[2] = 'e';
+ hwif->name[3] = '0' + index;
+ hwif->bus_state = BUSSTATE_ON;
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ ide_drive_t *drive = &hwif->drives[unit];
+
+ drive->media = ide_disk;
+ drive->select.all = (unit<<4)|0xa0;
+ drive->hwif = hwif;
+ drive->ctl = 0x08;
+ drive->ready_stat = READY_STAT;
+ drive->bad_wstat = BAD_W_STAT;
+ drive->special.b.recalibrate = 1;
+ drive->special.b.set_geometry = 1;
+ drive->name[0] = 'h';
+ drive->name[1] = 'd';
+ drive->name[2] = 'a' + (index * MAX_DRIVES) + unit;
+ drive->max_failures = IDE_DEFAULT_MAX_FAILURES;
+ /*init_waitqueue_head(&drive->wqueue);*/
+ }
+}
+
+/*
+ * init_ide_data() sets reasonable default values into all fields
+ * of all instances of the hwifs and drives, but only on the first call.
+ * Subsequent calls have no effect (they don't wipe out anything).
+ *
+ * This routine is normally called at driver initialization time,
+ * but may also be called MUCH earlier during kernel "command-line"
+ * parameter processing. As such, we cannot depend on any other parts
+ * of the kernel (such as memory allocation) to be functioning yet.
+ *
+ * This is too bad, as otherwise we could dynamically allocate the
+ * ide_drive_t structs as needed, rather than always consuming memory
+ * for the max possible number (MAX_HWIFS * MAX_DRIVES) of them.
+ */
+#define MAGIC_COOKIE 0x12345678
+static void __init init_ide_data (void)
+{
+ unsigned int index;
+ static unsigned long magic_cookie = MAGIC_COOKIE;
+
+ if (magic_cookie != MAGIC_COOKIE)
+ return; /* already initialized */
+ magic_cookie = 0;
+
+ /* Initialise all interface structures */
+ for (index = 0; index < MAX_HWIFS; ++index)
+ init_hwif_data(index);
+
+ /* Add default hw interfaces */
+ ide_init_default_hwifs();
+
+ idebus_parameter = 0;
+ system_bus_speed = 0;
+}
+
+/*
+ * CompactFlash cards and their brethern pretend to be removable hard disks, except:
+ * (1) they never have a slave unit, and
+ * (2) they don't have doorlock mechanisms.
+ * This test catches them, and is invoked elsewhere when setting appropriate config bits.
+ *
+ * FIXME: This treatment is probably applicable for *all* PCMCIA (PC CARD) devices,
+ * so in linux 2.3.x we should change this to just treat all PCMCIA drives this way,
+ * and get rid of the model-name tests below (too big of an interface change for 2.2.x).
+ * At that time, we might also consider parameterizing the timeouts and retries,
+ * since these are MUCH faster than mechanical drives. -M.Lord
+ */
+int drive_is_flashcard (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+
+ if (drive->removable && id != NULL) {
+ if (id->config == 0x848a) return 1; /* CompactFlash */
+ if (!strncmp(id->model, "KODAK ATA_FLASH", 15) /* Kodak */
+ || !strncmp(id->model, "Hitachi CV", 10) /* Hitachi */
+ || !strncmp(id->model, "SunDisk SDCFB", 13) /* SunDisk */
+ || !strncmp(id->model, "HAGIWARA HPC", 12) /* Hagiwara */
+ || !strncmp(id->model, "LEXAR ATA_FLASH", 15) /* Lexar */
+ || !strncmp(id->model, "ATA_FLASH", 9)) /* Simple Tech */
+ {
+ return 1; /* yes, it is a flash memory card */
+ }
+ }
+ return 0; /* no, it is not a flash memory card */
+}
+
+/*
+ * ide_system_bus_speed() returns what we think is the system VESA/PCI
+ * bus speed (in MHz). This is used for calculating interface PIO timings.
+ * The default is 40 for known PCI systems, 50 otherwise.
+ * The "idebus=xx" parameter can be used to override this value.
+ * The actual value to be used is computed/displayed the first time through.
+ */
+int ide_system_bus_speed (void)
+{
+ if (!system_bus_speed) {
+ if (idebus_parameter)
+ system_bus_speed = idebus_parameter; /* user supplied value */
+#ifdef CONFIG_PCI
+ else if (pci_present())
+ system_bus_speed = 33; /* safe default value for PCI */
+#endif /* CONFIG_PCI */
+ else
+ system_bus_speed = 50; /* safe default value for VESA and PCI */
+ printk("ide: Assuming %dMHz system bus speed for PIO modes%s\n", system_bus_speed,
+ idebus_parameter ? "" : "; override with idebus=xx");
+ }
+ return system_bus_speed;
+}
+
+#if SUPPORT_VLB_SYNC
+/*
+ * Some localbus EIDE interfaces require a special access sequence
+ * when using 32-bit I/O instructions to transfer data. We call this
+ * the "vlb_sync" sequence, which consists of three successive reads
+ * of the sector count register location, with interrupts disabled
+ * to ensure that the reads all happen together.
+ */
+static inline void do_vlb_sync (ide_ioreg_t port) {
+ (void) inb (port);
+ (void) inb (port);
+ (void) inb (port);
+}
+#endif /* SUPPORT_VLB_SYNC */
+
+/*
+ * This is used for most PIO data transfers *from* the IDE interface
+ */
+void ide_input_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+ void *buffer;
+ byte io_32bit;
+
+ /* first check if this controller has defined a special function
+ * for handling polled ide transfers
+ */
+
+ if(HWIF(drive)->ideproc) {
+ HWIF(drive)->ideproc(ideproc_ide_input_data,
+ drive, vbuffer, wcount);
+ return;
+ }
+
+ /* We assume controllers own functions will make their own
+ * arrangemnets for mapping/unmaping the destination mem if
+ * required (or not if DMA)
+ */
+
+ buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+ io_32bit = drive->io_32bit;
+
+ if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+ if (io_32bit & 2) {
+ unsigned long flags;
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only */
+ do_vlb_sync(IDE_NSECTOR_REG);
+ insl(IDE_DATA_REG, buffer, wcount);
+ __restore_flags(flags); /* local CPU only */
+ } else
+#endif /* SUPPORT_VLB_SYNC */
+ insl(IDE_DATA_REG, buffer, wcount);
+ } else {
+#if SUPPORT_SLOW_DATA_PORTS
+ if (drive->slow) {
+ unsigned short *ptr = (unsigned short *) buffer;
+ while (wcount--) {
+ *ptr++ = inw_p(IDE_DATA_REG);
+ *ptr++ = inw_p(IDE_DATA_REG);
+ }
+ } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+ insw(IDE_DATA_REG, buffer, wcount<<1);
+ }
+
+ unmap_domain_mem(buffer);
+}
+
+/*
+ * This is used for most PIO data transfers *to* the IDE interface
+ */
+void ide_output_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+ void *buffer;
+ byte io_32bit;
+
+ if(HWIF(drive)->ideproc) {
+ HWIF(drive)->ideproc(ideproc_ide_output_data,
+ drive, vbuffer, wcount);
+ return;
+ }
+
+ buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+ io_32bit = drive->io_32bit;
+
+ if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+ if (io_32bit & 2) {
+ unsigned long flags;
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only */
+ do_vlb_sync(IDE_NSECTOR_REG);
+ outsl(IDE_DATA_REG, buffer, wcount);
+ __restore_flags(flags); /* local CPU only */
+ } else
+#endif /* SUPPORT_VLB_SYNC */
+ outsl(IDE_DATA_REG, buffer, wcount);
+ } else {
+#if SUPPORT_SLOW_DATA_PORTS
+ if (drive->slow) {
+ unsigned short *ptr = (unsigned short *) buffer;
+ while (wcount--) {
+ outw_p(*ptr++, IDE_DATA_REG);
+ outw_p(*ptr++, IDE_DATA_REG);
+ }
+ } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+ outsw(IDE_DATA_REG, buffer, wcount<<1);
+ }
+
+ unmap_domain_mem(buffer);
+}
+
+/*
+ * The following routines are mainly used by the ATAPI drivers.
+ *
+ * These routines will round up any request for an odd number of bytes,
+ * so if an odd bytecount is specified, be sure that there's at least one
+ * extra byte allocated for the buffer.
+ */
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+ if(HWIF(drive)->ideproc) {
+ HWIF(drive)->ideproc(ideproc_atapi_input_bytes,
+ drive, buffer, bytecount);
+ return;
+ }
+printk("XXXXX atapi_input_bytes called -- mapping is likely broken\n");
+ ++bytecount;
+#if defined(CONFIG_ATARI) || defined(CONFIG_Q40)
+ if (MACH_IS_ATARI || MACH_IS_Q40) {
+ /* Atari has a byte-swapped IDE interface */
+ insw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+ return;
+ }
+#endif /* CONFIG_ATARI */
+ ide_input_data (drive, buffer, bytecount / 4);
+ if ((bytecount & 0x03) >= 2)
+ insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
+}
+
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+ if(HWIF(drive)->ideproc) {
+ HWIF(drive)->ideproc(ideproc_atapi_output_bytes,
+ drive, buffer, bytecount);
+ return;
+ }
+
+printk("XXXXX atapi_output_bytes called -- mapping is likely broken\n");
+
+ ++bytecount;
+#if defined(CONFIG_ATARI) || defined(CONFIG_Q40)
+ if (MACH_IS_ATARI || MACH_IS_Q40) {
+ /* Atari has a byte-swapped IDE interface */
+ outsw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+ return;
+ }
+#endif /* CONFIG_ATARI */
+ ide_output_data (drive, buffer, bytecount / 4);
+ if ((bytecount & 0x03) >= 2)
+ outsw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
+}
+
+/*
+ * Needed for PCI irq sharing
+ */
+//static inline
+int drive_is_ready (ide_drive_t *drive)
+{
+ byte stat = 0;
+ if (drive->waiting_for_dma)
+ return HWIF(drive)->dmaproc(ide_dma_test_irq, drive);
+#if 0
+ udelay(1); /* need to guarantee 400ns since last command was issued */
+#endif
+
+#ifdef CONFIG_IDEPCI_SHARE_IRQ
+ /*
+ * We do a passive status test under shared PCI interrupts on
+ * cards that truly share the ATA side interrupt, but may also share
+ * an interrupt with another pci card/device. We make no assumptions
+ * about possible isa-pnp and pci-pnp issues yet.
+ */
+ if (IDE_CONTROL_REG)
+ stat = GET_ALTSTAT();
+ else
+#endif /* CONFIG_IDEPCI_SHARE_IRQ */
+ stat = GET_STAT(); /* Note: this may clear a pending IRQ!! */
+
+ if (stat & BUSY_STAT)
+ return 0; /* drive busy: definitely not interrupting */
+ return 1; /* drive ready: *might* be interrupting */
+}
+
+/*
+ * This is our end_request replacement function.
+ */
+void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+ struct request *rq;
+ unsigned long flags;
+ ide_drive_t *drive = hwgroup->drive;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ rq = hwgroup->rq;
+
+ /*
+ * decide whether to reenable DMA -- 3 is a random magic for now,
+ * if we DMA timeout more than 3 times, just stay in PIO
+ */
+ if (drive->state == DMA_PIO_RETRY && drive->retry_pio <= 3) {
+ drive->state = 0;
+ hwgroup->hwif->dmaproc(ide_dma_on, drive);
+ }
+
+ if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) {
+ add_blkdev_randomness(MAJOR(rq->rq_dev));
+ blkdev_dequeue_request(rq);
+ hwgroup->rq = NULL;
+ end_that_request_last(rq);
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * This should get invoked any time we exit the driver to
+ * wait for an interrupt response from a drive. handler() points
+ * at the appropriate code to handle the next interrupt, and a
+ * timer is started to prevent us from waiting forever in case
+ * something goes wrong (see the ide_timer_expiry() handler later on).
+ */
+void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
+ unsigned int timeout, ide_expiry_t *expiry)
+{
+ unsigned long flags;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ if (hwgroup->handler != NULL) {
+ printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n",
+ drive->name, hwgroup->handler, handler);
+ }
+ hwgroup->handler = handler;
+ hwgroup->expiry = expiry;
+ hwgroup->timer.expires = jiffies + timeout;
+ add_timer(&hwgroup->timer);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * current_capacity() returns the capacity (in sectors) of a drive
+ * according to its current geometry/LBA settings.
+ */
+unsigned long current_capacity (ide_drive_t *drive)
+{
+ if (!drive->present)
+ return 0;
+ if (drive->driver != NULL)
+ return DRIVER(drive)->capacity(drive);
+ return 0;
+}
+
+extern struct block_device_operations ide_fops[];
+/*
+ * ide_geninit() is called exactly *once* for each interface.
+ */
+void ide_geninit (ide_hwif_t *hwif)
+{
+ unsigned int unit;
+ struct gendisk *gd = hwif->gd;
+
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ ide_drive_t *drive = &hwif->drives[unit];
+
+ if (!drive->present)
+ continue;
+ if (drive->media!=ide_disk && drive->media!=ide_floppy)
+ continue;
+ register_disk(gd,MKDEV(hwif->major,unit<<PARTN_BITS),
+#ifdef CONFIG_BLK_DEV_ISAPNP
+ (drive->forced_geom && drive->noprobe) ? 1 :
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+ 1<<PARTN_BITS, ide_fops,
+ current_capacity(drive));
+ }
+}
+
+static ide_startstop_t do_reset1 (ide_drive_t *, int); /* needed below */
+
+/*
+ * atapi_reset_pollfunc() gets invoked to poll the interface for completion every 50ms
+ * during an atapi drive reset operation. If the drive has not yet responded,
+ * and we have not yet hit our maximum waiting time, then the timer is restarted
+ * for another 50ms.
+ */
+static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
+{
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ byte stat;
+
+ SELECT_DRIVE(HWIF(drive),drive);
+ udelay (10);
+
+ if (OK_STAT(stat=GET_STAT(), 0, BUSY_STAT)) {
+ printk("%s: ATAPI reset complete\n", drive->name);
+ } else {
+ if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) {
+ ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL);
+ return ide_started; /* continue polling */
+ }
+ hwgroup->poll_timeout = 0; /* end of polling */
+ printk("%s: ATAPI reset timed-out, status=0x%02x\n", drive->name, stat);
+ return do_reset1 (drive, 1); /* do it the old fashioned way */
+ }
+ hwgroup->poll_timeout = 0; /* done polling */
+ return ide_stopped;
+}
+
+/*
+ * reset_pollfunc() gets invoked to poll the interface for completion every 50ms
+ * during an ide reset operation. If the drives have not yet responded,
+ * and we have not yet hit our maximum waiting time, then the timer is restarted
+ * for another 50ms.
+ */
+static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
+{
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ ide_hwif_t *hwif = HWIF(drive);
+ byte tmp;
+
+ if (!OK_STAT(tmp=GET_STAT(), 0, BUSY_STAT)) {
+ if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) {
+ ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL);
+ return ide_started; /* continue polling */
+ }
+ printk("%s: reset timed-out, status=0x%02x\n", hwif->name, tmp);
+ drive->failures++;
+ } else {
+ printk("%s: reset: ", hwif->name);
+ if ((tmp = GET_ERR()) == 1) {
+ printk("success\n");
+ drive->failures = 0;
+ } else {
+ drive->failures++;
+#if FANCY_STATUS_DUMPS
+ printk("master: ");
+ switch (tmp & 0x7f) {
+ case 1: printk("passed");
+ break;
+ case 2: printk("formatter device error");
+ break;
+ case 3: printk("sector buffer error");
+ break;
+ case 4: printk("ECC circuitry error");
+ break;
+ case 5: printk("controlling MPU error");
+ break;
+ default:printk("error (0x%02x?)", tmp);
+ }
+ if (tmp & 0x80)
+ printk("; slave: failed");
+ printk("\n");
+#else
+ printk("failed\n");
+#endif /* FANCY_STATUS_DUMPS */
+ }
+ }
+ hwgroup->poll_timeout = 0; /* done polling */
+ return ide_stopped;
+}
+
+static void check_dma_crc (ide_drive_t *drive)
+{
+ if (drive->crc_count) {
+ (void) HWIF(drive)->dmaproc(ide_dma_off_quietly, drive);
+ if ((HWIF(drive)->speedproc) != NULL)
+ HWIF(drive)->speedproc(drive, ide_auto_reduce_xfer(drive));
+ if (drive->current_speed >= XFER_SW_DMA_0)
+ (void) HWIF(drive)->dmaproc(ide_dma_on, drive);
+ } else {
+ (void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+ }
+}
+
+static void pre_reset (ide_drive_t *drive)
+{
+ if (drive->driver != NULL)
+ DRIVER(drive)->pre_reset(drive);
+
+ if (!drive->keep_settings) {
+ if (drive->using_dma) {
+ check_dma_crc(drive);
+ } else {
+ drive->unmask = 0;
+ drive->io_32bit = 0;
+ }
+ return;
+ }
+ if (drive->using_dma)
+ check_dma_crc(drive);
+}
+
+/*
+ * do_reset1() attempts to recover a confused drive by resetting it.
+ * Unfortunately, resetting a disk drive actually resets all devices on
+ * the same interface, so it can really be thought of as resetting the
+ * interface rather than resetting the drive.
+ *
+ * ATAPI devices have their own reset mechanism which allows them to be
+ * individually reset without clobbering other devices on the same interface.
+ *
+ * Unfortunately, the IDE interface does not generate an interrupt to let
+ * us know when the reset operation has finished, so we must poll for this.
+ * Equally poor, though, is the fact that this may a very long time to complete,
+ * (up to 30 seconds worstcase). So, instead of busy-waiting here for it,
+ * we set a timer to poll at 50ms intervals.
+ */
+static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
+{
+ unsigned int unit;
+ unsigned long flags;
+ ide_hwif_t *hwif = HWIF(drive);
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+ __save_flags(flags); /* local CPU only */
+ __cli(); /* local CPU only */
+
+ /* For an ATAPI device, first try an ATAPI SRST. */
+ if (drive->media != ide_disk && !do_not_try_atapi) {
+ pre_reset(drive);
+ SELECT_DRIVE(hwif,drive);
+ udelay (20);
+ OUT_BYTE (WIN_SRST, IDE_COMMAND_REG);
+ hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
+ ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL);
+ __restore_flags (flags); /* local CPU only */
+ return ide_started;
+ }
+
+ /*
+ * First, reset any device state data we were maintaining
+ * for any of the drives on this interface.
+ */
+ for (unit = 0; unit < MAX_DRIVES; ++unit)
+ pre_reset(&hwif->drives[unit]);
+
+#if OK_TO_RESET_CONTROLLER
+ if (!IDE_CONTROL_REG) {
+ __restore_flags(flags);
+ return ide_stopped;
+ }
+ /*
+ * Note that we also set nIEN while resetting the device,
+ * to mask unwanted interrupts from the interface during the reset.
+ * However, due to the design of PC hardware, this will cause an
+ * immediate interrupt due to the edge transition it produces.
+ * This single interrupt gives us a "fast poll" for drives that
+ * recover from reset very quickly, saving us the first 50ms wait time.
+ */
+ OUT_BYTE(drive->ctl|6,IDE_CONTROL_REG); /* set SRST and nIEN */
+ udelay(10); /* more than enough time */
+ if (drive->quirk_list == 2) {
+ OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* clear SRST and nIEN */
+ } else {
+ OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* clear SRST, leave nIEN */
+ }
+ udelay(10); /* more than enough time */
+ hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
+ ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL);
+
+ /*
+ * Some weird controller like resetting themselves to a strange
+ * state when the disks are reset this way. At least, the Winbond
+ * 553 documentation says that
+ */
+ if (hwif->resetproc != NULL)
+ hwif->resetproc(drive);
+
+#endif /* OK_TO_RESET_CONTROLLER */
+
+ __restore_flags (flags); /* local CPU only */
+ return ide_started;
+}
+
+/*
+ * ide_do_reset() is the entry point to the drive/interface reset code.
+ */
+ide_startstop_t ide_do_reset (ide_drive_t *drive)
+{
+ return do_reset1 (drive, 0);
+}
+
+static inline u32 read_24 (ide_drive_t *drive)
+{
+ return (IN_BYTE(IDE_HCYL_REG)<<16) |
+ (IN_BYTE(IDE_LCYL_REG)<<8) |
+ IN_BYTE(IDE_SECTOR_REG);
+}
+
+/*
+ * Clean up after success/failure of an explicit drive cmd
+ */
+void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err)
+{
+ unsigned long flags;
+ struct request *rq;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ rq = HWGROUP(drive)->rq;
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ switch(rq->cmd) {
+ case IDE_DRIVE_CMD:
+ {
+ byte *args = (byte *) rq->buffer;
+ rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+ if (args) {
+ args[0] = stat;
+ args[1] = err;
+ args[2] = IN_BYTE(IDE_NSECTOR_REG);
+ }
+ break;
+ }
+ case IDE_DRIVE_TASK:
+ {
+ byte *args = (byte *) rq->buffer;
+ rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+ if (args) {
+ args[0] = stat;
+ args[1] = err;
+ args[2] = IN_BYTE(IDE_NSECTOR_REG);
+ args[3] = IN_BYTE(IDE_SECTOR_REG);
+ args[4] = IN_BYTE(IDE_LCYL_REG);
+ args[5] = IN_BYTE(IDE_HCYL_REG);
+ args[6] = IN_BYTE(IDE_SELECT_REG);
+ }
+ break;
+ }
+ case IDE_DRIVE_TASKFILE:
+ {
+ ide_task_t *args = (ide_task_t *) rq->special;
+ rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+ if (args) {
+ if (args->tf_in_flags.b.data) {
+ unsigned short data = IN_WORD(IDE_DATA_REG);
+ args->tfRegister[IDE_DATA_OFFSET] = (data) & 0xFF;
+ args->hobRegister[IDE_DATA_OFFSET_HOB] = (data >> 8) & 0xFF;
+ }
+ args->tfRegister[IDE_ERROR_OFFSET] = err;
+ args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG);
+ args->tfRegister[IDE_SECTOR_OFFSET] = IN_BYTE(IDE_SECTOR_REG);
+ args->tfRegister[IDE_LCYL_OFFSET] = IN_BYTE(IDE_LCYL_REG);
+ args->tfRegister[IDE_HCYL_OFFSET] = IN_BYTE(IDE_HCYL_REG);
+ args->tfRegister[IDE_SELECT_OFFSET] = IN_BYTE(IDE_SELECT_REG);
+ args->tfRegister[IDE_STATUS_OFFSET] = stat;
+
+ if ((drive->id->command_set_2 & 0x0400) &&
+ (drive->id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB);
+ args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG);
+ args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG);
+ args->hobRegister[IDE_SECTOR_OFFSET_HOB] = IN_BYTE(IDE_SECTOR_REG);
+ args->hobRegister[IDE_LCYL_OFFSET_HOB] = IN_BYTE(IDE_LCYL_REG);
+ args->hobRegister[IDE_HCYL_OFFSET_HOB] = IN_BYTE(IDE_HCYL_REG);
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ spin_lock_irqsave(&io_request_lock, flags);
+ blkdev_dequeue_request(rq);
+ HWGROUP(drive)->rq = NULL;
+ end_that_request_last(rq);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat)
+{
+ unsigned long flags;
+ byte err = 0;
+
+ __save_flags (flags); /* local CPU only */
+ ide__sti(); /* local CPU only */
+ printk("%s: %s: status=0x%02x", drive->name, msg, stat);
+#if FANCY_STATUS_DUMPS
+ printk(" { ");
+ if (stat & BUSY_STAT)
+ printk("Busy ");
+ else {
+ if (stat & READY_STAT) printk("DriveReady ");
+ if (stat & WRERR_STAT) printk("DeviceFault ");
+ if (stat & SEEK_STAT) printk("SeekComplete ");
+ if (stat & DRQ_STAT) printk("DataRequest ");
+ if (stat & ECC_STAT) printk("CorrectedError ");
+ if (stat & INDEX_STAT) printk("Index ");
+ if (stat & ERR_STAT) printk("Error ");
+ }
+ printk("}");
+#endif /* FANCY_STATUS_DUMPS */
+ printk("\n");
+ if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) {
+ err = GET_ERR();
+ printk("%s: %s: error=0x%02x", drive->name, msg, err);
+#if FANCY_STATUS_DUMPS
+ if (drive->media == ide_disk) {
+ printk(" { ");
+ if (err & ABRT_ERR) printk("DriveStatusError ");
+ if (err & ICRC_ERR) printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector ");
+ if (err & ECC_ERR) printk("UncorrectableError ");
+ if (err & ID_ERR) printk("SectorIdNotFound ");
+ if (err & TRK0_ERR) printk("TrackZeroNotFound ");
+ if (err & MARK_ERR) printk("AddrMarkNotFound ");
+ printk("}");
+ if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) {
+ if ((drive->id->command_set_2 & 0x0400) &&
+ (drive->id->cfs_enable_2 & 0x0400) &&
+ (drive->addressing == 1)) {
+ __u64 sectors = 0;
+ u32 low = 0, high = 0;
+ low = read_24(drive);
+ OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG);
+ high = read_24(drive);
+
+ sectors = ((__u64)high << 24) | low;
+ printk(", LBAsect=%llu, high=%d, low=%d",
+ (unsigned long long) sectors,
+ high, low);
+ } else {
+ byte cur = IN_BYTE(IDE_SELECT_REG);
+ if (cur & 0x40) { /* using LBA? */
+ printk(", LBAsect=%ld", (unsigned long)
+ ((cur&0xf)<<24)
+ |(IN_BYTE(IDE_HCYL_REG)<<16)
+ |(IN_BYTE(IDE_LCYL_REG)<<8)
+ | IN_BYTE(IDE_SECTOR_REG));
+ } else {
+ printk(", CHS=%d/%d/%d",
+ (IN_BYTE(IDE_HCYL_REG)<<8) +
+ IN_BYTE(IDE_LCYL_REG),
+ cur & 0xf,
+ IN_BYTE(IDE_SECTOR_REG));
+ }
+ }
+ if (HWGROUP(drive) && HWGROUP(drive)->rq)
+ printk(", sector=%ld", HWGROUP(drive)->rq->sector);
+ }
+ }
+#endif /* FANCY_STATUS_DUMPS */
+ printk("\n");
+ }
+ __restore_flags (flags); /* local CPU only */
+ return err;
+}
+
+/*
+ * try_to_flush_leftover_data() is invoked in response to a drive
+ * unexpectedly having its DRQ_STAT bit set. As an alternative to
+ * resetting the drive, this routine tries to clear the condition
+ * by read a sector's worth of data from the drive. Of course,
+ * this may not help if the drive is *waiting* for data from *us*.
+ */
+static void try_to_flush_leftover_data (ide_drive_t *drive)
+{
+ int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
+
+ if (drive->media != ide_disk)
+ return;
+ while (i > 0) {
+ u32 buffer[16];
+ unsigned int wcount = (i > 16) ? 16 : i;
+ i -= wcount;
+ ide_input_data (drive, buffer, wcount);
+ }
+}
+
+/*
+ * ide_error() takes action based on the error returned by the drive.
+ */
+ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat)
+{
+ struct request *rq;
+ byte err;
+
+ err = ide_dump_status(drive, msg, stat);
+ if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+ return ide_stopped;
+ /* retry only "normal" I/O: */
+ if (rq->cmd == IDE_DRIVE_CMD || rq->cmd == IDE_DRIVE_TASK) {
+ rq->errors = 1;
+ ide_end_drive_cmd(drive, stat, err);
+ return ide_stopped;
+ }
+ if (rq->cmd == IDE_DRIVE_TASKFILE) {
+ rq->errors = 1;
+ ide_end_drive_cmd(drive, stat, err);
+// ide_end_taskfile(drive, stat, err);
+ return ide_stopped;
+ }
+
+ if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */
+ rq->errors |= ERROR_RESET;
+ } else {
+ if (drive->media == ide_disk && (stat & ERR_STAT)) {
+ /* err has different meaning on cdrom and tape */
+ if (err == ABRT_ERR) {
+ if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY)
+ return ide_stopped; /* some newer drives don't support WIN_SPECIFY */
+ } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) {
+ drive->crc_count++; /* UDMA crc error -- just retry the operation */
+ } else if (err & (BBD_ERR | ECC_ERR)) /* retries won't help these */
+ rq->errors = ERROR_MAX;
+ else if (err & TRK0_ERR) /* help it find track zero */
+ rq->errors |= ERROR_RECAL;
+ }
+ if ((stat & DRQ_STAT) && rq->cmd != WRITE)
+ try_to_flush_leftover_data(drive);
+ }
+ if (GET_STAT() & (BUSY_STAT|DRQ_STAT))
+ OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG); /* force an abort */
+
+ if (rq->errors >= ERROR_MAX) {
+ if (drive->driver != NULL)
+ DRIVER(drive)->end_request(0, HWGROUP(drive));
+ else
+ ide_end_request(0, HWGROUP(drive));
+ } else {
+ if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
+ ++rq->errors;
+ return ide_do_reset(drive);
+ }
+ if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+ drive->special.b.recalibrate = 1;
+ ++rq->errors;
+ }
+ return ide_stopped;
+}
+
+/*
+ * Issue a simple drive command
+ * The drive must be selected beforehand.
+ */
+void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler)
+{
+ ide_set_handler (drive, handler, WAIT_CMD, NULL);
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* clear nIEN */
+ SELECT_MASK(HWIF(drive),drive,0);
+ OUT_BYTE(nsect,IDE_NSECTOR_REG);
+ OUT_BYTE(cmd,IDE_COMMAND_REG);
+}
+
+/*
+ * drive_cmd_intr() is invoked on completion of a special DRIVE_CMD.
+ */
+static ide_startstop_t drive_cmd_intr (ide_drive_t *drive)
+{
+ struct request *rq = HWGROUP(drive)->rq;
+ byte *args = (byte *) rq->buffer;
+ byte stat = GET_STAT();
+ int retries = 10;
+
+ ide__sti(); /* local CPU only */
+ if ((stat & DRQ_STAT) && args && args[3]) {
+ byte io_32bit = drive->io_32bit;
+ drive->io_32bit = 0;
+ ide_input_data(drive, &args[4], args[3] * SECTOR_WORDS);
+ drive->io_32bit = io_32bit;
+ while (((stat = GET_STAT()) & BUSY_STAT) && retries--)
+ udelay(100);
+ }
+
+ if (!OK_STAT(stat, READY_STAT, BAD_STAT))
+ return ide_error(drive, "drive_cmd", stat); /* calls ide_end_drive_cmd */
+ ide_end_drive_cmd (drive, stat, GET_ERR());
+ return ide_stopped;
+}
+
+/*
+ * do_special() is used to issue WIN_SPECIFY, WIN_RESTORE, and WIN_SETMULT
+ * commands to a drive. It used to do much more, but has been scaled back.
+ */
+static ide_startstop_t do_special (ide_drive_t *drive)
+{
+ special_t *s = &drive->special;
+
+#ifdef DEBUG
+ printk("%s: do_special: 0x%02x\n", drive->name, s->all);
+#endif
+ if (s->b.set_tune) {
+ ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
+ s->b.set_tune = 0;
+ if (tuneproc != NULL)
+ tuneproc(drive, drive->tune_req);
+ } else if (drive->driver != NULL) {
+ return DRIVER(drive)->special(drive);
+ } else if (s->all) {
+ printk("%s: bad special flag: 0x%02x\n", drive->name, s->all);
+ s->all = 0;
+ }
+ return ide_stopped;
+}
+
+/*
+ * This routine busy-waits for the drive status to be not "busy".
+ * It then checks the status for all of the "good" bits and none
+ * of the "bad" bits, and if all is okay it returns 0. All other
+ * cases return 1 after invoking ide_error() -- caller should just return.
+ *
+ * This routine should get fixed to not hog the cpu during extra long waits..
+ * That could be done by busy-waiting for the first jiffy or two, and then
+ * setting a timer to wake up at half second intervals thereafter,
+ * until timeout is achieved, before timing out.
+ */
+int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout) {
+ byte stat;
+ int i;
+ unsigned long flags;
+
+ /* bail early if we've exceeded max_failures */
+ if (drive->max_failures && (drive->failures > drive->max_failures)) {
+ *startstop = ide_stopped;
+ return 1;
+ }
+
+ udelay(1); /* spec allows drive 400ns to assert "BUSY" */
+ if ((stat = GET_STAT()) & BUSY_STAT) {
+ __save_flags(flags); /* local CPU only */
+ ide__sti(); /* local CPU only */
+ timeout += jiffies;
+ while ((stat = GET_STAT()) & BUSY_STAT) {
+ if (0 < (signed long)(jiffies - timeout)) {
+ __restore_flags(flags); /* local CPU only */
+ *startstop = ide_error(drive, "status timeout", stat);
+ return 1;
+ }
+ }
+ __restore_flags(flags); /* local CPU only */
+ }
+ /*
+ * Allow status to settle, then read it again.
+ * A few rare drives vastly violate the 400ns spec here,
+ * so we'll wait up to 10usec for a "good" status
+ * rather than expensively fail things immediately.
+ * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
+ */
+ for (i = 0; i < 10; i++) {
+ udelay(1);
+ if (OK_STAT((stat = GET_STAT()), good, bad))
+ return 0;
+ }
+ *startstop = ide_error(drive, "status error", stat);
+ return 1;
+}
+
+/*
+ * execute_drive_cmd() issues a special drive command,
+ * usually initiated by ioctl() from the external hdparm program.
+ */
+static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq)
+{
+ switch(rq->cmd) {
+ case IDE_DRIVE_TASKFILE:
+ {
+ ide_task_t *args = rq->special;
+
+ if (!(args)) break;
+
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+ {
+ printk(KERN_INFO "%s: ", drive->name);
+// printk("TF.0=x%02x ", args->tfRegister[IDE_DATA_OFFSET]);
+ printk("TF.1=x%02x ", args->tfRegister[IDE_FEATURE_OFFSET]);
+ printk("TF.2=x%02x ", args->tfRegister[IDE_NSECTOR_OFFSET]);
+ printk("TF.3=x%02x ", args->tfRegister[IDE_SECTOR_OFFSET]);
+ printk("TF.4=x%02x ", args->tfRegister[IDE_LCYL_OFFSET]);
+ printk("TF.5=x%02x ", args->tfRegister[IDE_HCYL_OFFSET]);
+ printk("TF.6=x%02x ", args->tfRegister[IDE_SELECT_OFFSET]);
+ printk("TF.7=x%02x\n", args->tfRegister[IDE_COMMAND_OFFSET]);
+ printk(KERN_INFO "%s: ", drive->name);
+// printk("HTF.0=x%02x ", args->hobRegister[IDE_DATA_OFFSET_HOB]);
+ printk("HTF.1=x%02x ", args->hobRegister[IDE_FEATURE_OFFSET_HOB]);
+ printk("HTF.2=x%02x ", args->hobRegister[IDE_NSECTOR_OFFSET_HOB]);
+ printk("HTF.3=x%02x ", args->hobRegister[IDE_SECTOR_OFFSET_HOB]);
+ printk("HTF.4=x%02x ", args->hobRegister[IDE_LCYL_OFFSET_HOB]);
+ printk("HTF.5=x%02x ", args->hobRegister[IDE_HCYL_OFFSET_HOB]);
+ printk("HTF.6=x%02x ", args->hobRegister[IDE_SELECT_OFFSET_HOB]);
+ printk("HTF.7=x%02x\n", args->hobRegister[IDE_CONTROL_OFFSET_HOB]);
+ }
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+// if (args->tf_out_flags.all == 0) {
+ do_taskfile(drive,
+ (struct hd_drive_task_hdr *)&args->tfRegister,
+ (struct hd_drive_hob_hdr *)&args->hobRegister,
+ args->handler);
+// } else {
+// return flagged_taskfile(drive, args);
+// }
+
+ if (((args->command_type == IDE_DRIVE_TASK_RAW_WRITE) ||
+ (args->command_type == IDE_DRIVE_TASK_OUT)) &&
+ args->prehandler && args->handler)
+ return args->prehandler(drive, rq);
+ return ide_started;
+ }
+ case IDE_DRIVE_TASK:
+ {
+ byte *args = rq->buffer;
+ byte sel;
+
+ if (!(args)) break;
+#ifdef DEBUG
+ printk("%s: DRIVE_TASK_CMD ", drive->name);
+ printk("cmd=0x%02x ", args[0]);
+ printk("fr=0x%02x ", args[1]);
+ printk("ns=0x%02x ", args[2]);
+ printk("sc=0x%02x ", args[3]);
+ printk("lcyl=0x%02x ", args[4]);
+ printk("hcyl=0x%02x ", args[5]);
+ printk("sel=0x%02x\n", args[6]);
+#endif
+ OUT_BYTE(args[1], IDE_FEATURE_REG);
+ OUT_BYTE(args[3], IDE_SECTOR_REG);
+ OUT_BYTE(args[4], IDE_LCYL_REG);
+ OUT_BYTE(args[5], IDE_HCYL_REG);
+ sel = (args[6] & ~0x10);
+ if (drive->select.b.unit)
+ sel |= 0x10;
+ OUT_BYTE(sel, IDE_SELECT_REG);
+ ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
+ return ide_started;
+ }
+ case IDE_DRIVE_CMD:
+ {
+ byte *args = rq->buffer;
+
+ if (!(args)) break;
+#ifdef DEBUG
+ printk("%s: DRIVE_CMD ", drive->name);
+ printk("cmd=0x%02x ", args[0]);
+ printk("sc=0x%02x ", args[1]);
+ printk("fr=0x%02x ", args[2]);
+ printk("xx=0x%02x\n", args[3]);
+#endif
+ if (args[0] == WIN_SMART) {
+ OUT_BYTE(0x4f, IDE_LCYL_REG);
+ OUT_BYTE(0xc2, IDE_HCYL_REG);
+ OUT_BYTE(args[2],IDE_FEATURE_REG);
+ OUT_BYTE(args[1],IDE_SECTOR_REG);
+ ide_cmd(drive, args[0], args[3], &drive_cmd_intr);
+ return ide_started;
+ }
+ OUT_BYTE(args[2],IDE_FEATURE_REG);
+ ide_cmd(drive, args[0], args[1], &drive_cmd_intr);
+ return ide_started;
+ }
+ default:
+ break;
+ }
+ /*
+ * NULL is actually a valid way of waiting for
+ * all current requests to be flushed from the queue.
+ */
+#ifdef DEBUG
+ printk("%s: DRIVE_CMD (null)\n", drive->name);
+#endif
+ ide_end_drive_cmd(drive, GET_STAT(), GET_ERR());
+ return ide_stopped;
+}
+
+/*
+ * start_request() initiates handling of a new I/O request
+ * needed to reverse the perverted changes anonymously made back
+ * 2.3.99-pre6
+ */
+static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
+{
+ ide_startstop_t startstop;
+ unsigned long block, blockend;
+ unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS;
+ ide_hwif_t *hwif = HWIF(drive);
+
+#ifdef DEBUG
+ printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq);
+#endif
+ /* bail early if we've exceeded max_failures */
+ if (drive->max_failures && (drive->failures > drive->max_failures)) {
+ goto kill_rq;
+ }
+
+ if (unit >= MAX_DRIVES) {
+ printk("%s: bad device number: %s\n", hwif->name, kdevname(rq->rq_dev));
+ goto kill_rq;
+ }
+#ifdef DEBUG
+ if (rq->bh && !buffer_locked(rq->bh)) {
+ printk("%s: block not locked\n", drive->name);
+ goto kill_rq;
+ }
+#endif
+ block = rq->sector;
+ blockend = block + rq->nr_sectors;
+
+
+#ifdef NEVER
+ if ((rq->cmd == READ || rq->cmd == WRITE) &&
+ (drive->media == ide_disk || drive->media == ide_floppy)) {
+ if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) {
+ printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name,
+ (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors);
+ goto kill_rq;
+ }
+ block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0;
+ }
+ /* Yecch - this will shift the entire interval,
+ possibly killing some innocent following sector */
+ if (block == 0 && drive->remap_0_to_1 == 1)
+ block = 1; /* redirect MBR access to EZ-Drive partn table */
+#endif
+
+#ifdef NEVER_DEBUG
+ {
+ printk(" ide::start_request %lx %lx %lx %lx %lx\n",
+ rq->sector, rq->nr_sectors, block,
+ drive->part[minor&PARTN_MASK].start_sect, drive->sect0);
+ }
+#endif
+
+#if (DISK_RECOVERY_TIME > 0)
+ while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME);
+#endif
+
+ SELECT_DRIVE(hwif, drive);
+ if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) {
+ printk("%s: drive not ready for command\n", drive->name);
+ return startstop;
+ }
+ drive->special.all = 0;
+ if (!drive->special.all) {
+ switch(rq->cmd) {
+ case IDE_DRIVE_CMD:
+ case IDE_DRIVE_TASK:
+ case IDE_DRIVE_TASKFILE:
+ return execute_drive_cmd(drive, rq);
+ default:
+ break;
+ }
+ if (drive->driver != NULL) {
+ return (DRIVER(drive)->do_request(drive, rq, block));
+ }
+ printk("%s: media type %d not supported\n", drive->name, drive->media);
+ goto kill_rq;
+ }
+ return do_special(drive);
+kill_rq:
+ if (drive->driver != NULL)
+ DRIVER(drive)->end_request(0, HWGROUP(drive));
+ else
+ ide_end_request(0, HWGROUP(drive));
+ return ide_stopped;
+}
+
+ide_startstop_t restart_request (ide_drive_t *drive)
+{
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ unsigned long flags;
+ struct request *rq;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ hwgroup->handler = NULL;
+ del_timer(&hwgroup->timer);
+ rq = hwgroup->rq;
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ return start_request(drive, rq);
+}
+
+/*
+ * ide_stall_queue() can be used by a drive to give excess bandwidth back
+ * to the hwgroup by sleeping for timeout jiffies.
+ */
+void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
+{
+ if (timeout > WAIT_WORSTCASE)
+ timeout = WAIT_WORSTCASE;
+ drive->sleep = timeout + jiffies;
+}
+
+#define WAKEUP(drive) ((drive)->service_start + 2 * (drive)->service_time)
+
+/*
+ * choose_drive() selects the next drive which will be serviced.
+ */
+static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
+{
+ ide_drive_t *drive, *best;
+
+repeat:
+ best = NULL;
+ drive = hwgroup->drive;
+ do {
+ if (!list_empty(&drive->queue.queue_head) && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) {
+ if (!best
+ || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep)))
+ || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive))))
+ {
+ if( !drive->queue.plugged )
+ best = drive;
+ }
+ }
+ } while ((drive = drive->next) != hwgroup->drive);
+ if (best && best->nice1 && !best->sleep && best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) {
+ long t = (signed long)(WAKEUP(best) - jiffies);
+ if (t >= WAIT_MIN_SLEEP) {
+ /*
+ * We *may* have some time to spare, but first let's see if
+ * someone can potentially benefit from our nice mood today..
+ */
+ drive = best->next;
+ do {
+ if (!drive->sleep
+ && 0 < (signed long)(WAKEUP(drive) - (jiffies - best->service_time))
+ && 0 < (signed long)((jiffies + t) - WAKEUP(drive)))
+ {
+ ide_stall_queue(best, IDE_MIN(t, 10 * WAIT_MIN_SLEEP));
+ goto repeat;
+ }
+ } while ((drive = drive->next) != best);
+ }
+ }
+ return best;
+}
+
+/*
+ * Issue a new request to a drive from hwgroup
+ * Caller must have already done spin_lock_irqsave(&io_request_lock, ..);
+ *
+ * A hwgroup is a serialized group of IDE interfaces. Usually there is
+ * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
+ * may have both interfaces in a single hwgroup to "serialize" access.
+ * Or possibly multiple ISA interfaces can share a common IRQ by being grouped
+ * together into one hwgroup for serialized access.
+ *
+ * Note also that several hwgroups can end up sharing a single IRQ,
+ * possibly along with many other devices. This is especially common in
+ * PCI-based systems with off-board IDE controller cards.
+ *
+ * The IDE driver uses the single global io_request_lock spinlock to protect
+ * access to the request queues, and to protect the hwgroup->busy flag.
+ *
+ * The first thread into the driver for a particular hwgroup sets the
+ * hwgroup->busy flag to indicate that this hwgroup is now active,
+ * and then initiates processing of the top request from the request queue.
+ *
+ * Other threads attempting entry notice the busy setting, and will simply
+ * queue their new requests and exit immediately. Note that hwgroup->busy
+ * remains set even when the driver is merely awaiting the next interrupt.
+ * Thus, the meaning is "this hwgroup is busy processing a request".
+ *
+ * When processing of a request completes, the completing thread or IRQ-handler
+ * will start the next request from the queue. If no more work remains,
+ * the driver will clear the hwgroup->busy flag and exit.
+ *
+ * The io_request_lock (spinlock) is used to protect all access to the
+ * hwgroup->busy flag, but is otherwise not needed for most processing in
+ * the driver. This makes the driver much more friendlier to shared IRQs
+ * than previous designs, while remaining 100% (?) SMP safe and capable.
+ */
+/* --BenH: made non-static as ide-pmac.c uses it to kick the hwgroup back
+ * into life on wakeup from machine sleep.
+ */
+void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
+{
+ ide_drive_t *drive;
+ ide_hwif_t *hwif;
+ struct request *rq;
+ ide_startstop_t startstop;
+
+ ide_get_lock(&ide_lock, ide_intr, hwgroup); /* for atari only: POSSIBLY BROKEN HERE(?) */
+
+ __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */
+
+ while (!hwgroup->busy) {
+ hwgroup->busy = 1;
+ drive = choose_drive(hwgroup);
+ if (drive == NULL) {
+ unsigned long sleep = 0;
+ hwgroup->rq = NULL;
+ drive = hwgroup->drive;
+ do {
+ if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep)))
+ sleep = drive->sleep;
+ } while ((drive = drive->next) != hwgroup->drive);
+ if (sleep) {
+ /*
+ * Take a short snooze, and then wake up this hwgroup again.
+ * This gives other hwgroups on the same a chance to
+ * play fairly with us, just in case there are big differences
+ * in relative throughputs.. don't want to hog the cpu too much.
+ */
+ if (0 < (signed long)(jiffies + WAIT_MIN_SLEEP - sleep))
+ sleep = jiffies + WAIT_MIN_SLEEP;
+#if 1
+ if (timer_pending(&hwgroup->timer))
+ printk("ide_set_handler: timer already active\n");
+#endif
+ hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */
+ mod_timer(&hwgroup->timer, sleep);
+ /* we purposely leave hwgroup->busy==1 while sleeping */
+ } else {
+ /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */
+ ide_release_lock(&ide_lock); /* for atari only */
+ hwgroup->busy = 0;
+ }
+ return; /* no more work for this hwgroup (for now) */
+ }
+ hwif = HWIF(drive);
+ if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif && hwif->io_ports[IDE_CONTROL_OFFSET]) {
+ /* set nIEN for previous hwif */
+ SELECT_INTERRUPT(hwif, drive);
+ }
+ hwgroup->hwif = hwif;
+ hwgroup->drive = drive;
+ drive->sleep = 0;
+ drive->service_start = jiffies;
+
+ if ( drive->queue.plugged ) /* paranoia */
+ printk("%s: Huh? nuking plugged queue\n", drive->name);
+
+ rq = hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);
+ /*
+ * Some systems have trouble with IDE IRQs arriving while
+ * the driver is still setting things up. So, here we disable
+ * the IRQ used by this interface while the request is being started.
+ * This may look bad at first, but pretty much the same thing
+ * happens anyway when any interrupt comes in, IDE or otherwise
+ * -- the kernel masks the IRQ while it is being handled.
+ */
+ if (masked_irq && hwif->irq != masked_irq)
+ disable_irq_nosync(hwif->irq);
+ spin_unlock(&io_request_lock);
+ ide__sti(); /* allow other IRQs while we start this request */
+ startstop = start_request(drive, rq);
+ spin_lock_irq(&io_request_lock);
+ if (masked_irq && hwif->irq != masked_irq)
+ enable_irq(hwif->irq);
+ if (startstop == ide_stopped)
+ hwgroup->busy = 0;
+ }
+}
+
+/*
+ * ide_get_queue() returns the queue which corresponds to a given device.
+ */
+request_queue_t *ide_get_queue (kdev_t dev)
+{
+ ide_hwif_t *hwif = (ide_hwif_t *)blk_dev[MAJOR(dev)].data;
+
+ return &hwif->drives[DEVICE_NR(dev) & 1].queue;
+}
+
+/*
+ * Passes the stuff to ide_do_request
+ */
+void do_ide_request(request_queue_t *q)
+{
+ ide_do_request(q->queuedata, 0);
+}
+
+/*
+ * un-busy the hwgroup etc, and clear any pending DMA status. we want to
+ * retry the current request in pio mode instead of risking tossing it
+ * all away
+ */
+void ide_dma_timeout_retry(ide_drive_t *drive)
+{
+ ide_hwif_t *hwif = HWIF(drive);
+ struct request *rq;
+
+ /*
+ * end current dma transaction
+ */
+ (void) hwif->dmaproc(ide_dma_end, drive);
+
+ /*
+ * complain a little, later we might remove some of this verbosity
+ */
+ printk("%s: timeout waiting for DMA\n", drive->name);
+ (void) hwif->dmaproc(ide_dma_timeout, drive);
+
+ /*
+ * disable dma for now, but remember that we did so because of
+ * a timeout -- we'll reenable after we finish this next request
+ * (or rather the first chunk of it) in pio.
+ */
+ drive->retry_pio++;
+ drive->state = DMA_PIO_RETRY;
+ (void) hwif->dmaproc(ide_dma_off_quietly, drive);
+
+ /*
+ * un-busy drive etc (hwgroup->busy is cleared on return) and
+ * make sure request is sane
+ */
+ rq = HWGROUP(drive)->rq;
+ HWGROUP(drive)->rq = NULL;
+
+ rq->errors = 0;
+ rq->sector = rq->bh->b_rsector;
+ rq->current_nr_sectors = rq->bh->b_size >> 9;
+ rq->buffer = rq->bh->b_data;
+}
+
+/*
+ * ide_timer_expiry() is our timeout function for all drive operations.
+ * But note that it can also be invoked as a result of a "sleep" operation
+ * triggered by the mod_timer() call in ide_do_request.
+ */
+void ide_timer_expiry (unsigned long data)
+{
+ ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data;
+ ide_handler_t *handler;
+ ide_expiry_t *expiry;
+ unsigned long flags;
+ unsigned long wait;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ del_timer(&hwgroup->timer);
+
+ if ((handler = hwgroup->handler) == NULL) {
+ /*
+ * Either a marginal timeout occurred
+ * (got the interrupt just as timer expired),
+ * or we were "sleeping" to give other devices a chance.
+ * Either way, we don't really want to complain about anything.
+ */
+ if (hwgroup->sleeping) {
+ hwgroup->sleeping = 0;
+ hwgroup->busy = 0;
+ }
+ } else {
+ ide_drive_t *drive = hwgroup->drive;
+ if (!drive) {
+ printk("ide_timer_expiry: hwgroup->drive was NULL\n");
+ hwgroup->handler = NULL;
+ } else {
+ ide_hwif_t *hwif;
+ ide_startstop_t startstop;
+ if (!hwgroup->busy) {
+ hwgroup->busy = 1; /* paranoia */
+ printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name);
+ }
+ if ((expiry = hwgroup->expiry) != NULL) {
+ /* continue */
+ if ((wait = expiry(drive)) != 0) {
+ /* reset timer */
+ hwgroup->timer.expires = jiffies + wait;
+ add_timer(&hwgroup->timer);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+ }
+ hwgroup->handler = NULL;
+ /*
+ * We need to simulate a real interrupt when invoking
+ * the handler() function, which means we need to globally
+ * mask the specific IRQ:
+ */
+ spin_unlock(&io_request_lock);
+ hwif = HWIF(drive);
+#if DISABLE_IRQ_NOSYNC
+ disable_irq_nosync(hwif->irq);
+#else
+ disable_irq(hwif->irq); /* disable_irq_nosync ?? */
+#endif /* DISABLE_IRQ_NOSYNC */
+ __cli(); /* local CPU only, as if we were handling an interrupt */
+ if (hwgroup->poll_timeout != 0) {
+ startstop = handler(drive);
+ } else if (drive_is_ready(drive)) {
+ if (drive->waiting_for_dma)
+ (void) hwgroup->hwif->dmaproc(ide_dma_lostirq, drive);
+ (void)ide_ack_intr(hwif);
+ printk("%s: lost interrupt\n", drive->name);
+ startstop = handler(drive);
+ } else {
+ if (drive->waiting_for_dma) {
+ startstop = ide_stopped;
+ ide_dma_timeout_retry(drive);
+ } else
+ startstop = ide_error(drive, "irq timeout", GET_STAT());
+ }
+ set_recovery_timer(hwif);
+ drive->service_time = jiffies - drive->service_start;
+ enable_irq(hwif->irq);
+ spin_lock_irq(&io_request_lock);
+ if (startstop == ide_stopped)
+ hwgroup->busy = 0;
+ }
+ }
+ ide_do_request(hwgroup, 0);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * There's nothing really useful we can do with an unexpected interrupt,
+ * other than reading the status register (to clear it), and logging it.
+ * There should be no way that an irq can happen before we're ready for it,
+ * so we needn't worry much about losing an "important" interrupt here.
+ *
+ * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
+ * drive enters "idle", "standby", or "sleep" mode, so if the status looks
+ * "good", we just ignore the interrupt completely.
+ *
+ * This routine assumes __cli() is in effect when called.
+ *
+ * If an unexpected interrupt happens on irq15 while we are handling irq14
+ * and if the two interfaces are "serialized" (CMD640), then it looks like
+ * we could screw up by interfering with a new request being set up for irq15.
+ *
+ * In reality, this is a non-issue. The new command is not sent unless the
+ * drive is ready to accept one, in which case we know the drive is not
+ * trying to interrupt us. And ide_set_handler() is always invoked before
+ * completing the issuance of any new drive command, so we will not be
+ * accidentally invoked as a result of any valid command completion interrupt.
+ *
+ */
+static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
+{
+ byte stat;
+ ide_hwif_t *hwif = hwgroup->hwif;
+
+ /*
+ * handle the unexpected interrupt
+ */
+ do {
+ if (hwif->irq == irq) {
+ stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+ if (!OK_STAT(stat, READY_STAT, BAD_STAT)) {
+ /* Try to not flood the console with msgs */
+ static unsigned long last_msgtime, count;
+ ++count;
+ if (0 < (signed long)(jiffies - (last_msgtime + HZ))) {
+ last_msgtime = jiffies;
+ printk("%s%s: unexpected interrupt, status=0x%02x, count=%ld\n",
+ hwif->name, (hwif->next == hwgroup->hwif) ? "" : "(?)", stat, count);
+ }
+ }
+ }
+ } while ((hwif = hwif->next) != hwgroup->hwif);
+}
+
+/*
+ * entry point for all interrupts, caller does __cli() for us
+ */
+void ide_intr (int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long flags;
+ ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
+ ide_hwif_t *hwif;
+ ide_drive_t *drive;
+ ide_handler_t *handler;
+ ide_startstop_t startstop;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ hwif = hwgroup->hwif;
+
+ if (!ide_ack_intr(hwif)) {
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+
+ if ((handler = hwgroup->handler) == NULL || hwgroup->poll_timeout != 0) {
+ /*
+ * Not expecting an interrupt from this drive.
+ * That means this could be:
+ * (1) an interrupt from another PCI device
+ * sharing the same PCI INT# as us.
+ * or (2) a drive just entered sleep or standby mode,
+ * and is interrupting to let us know.
+ * or (3) a spurious interrupt of unknown origin.
+ *
+ * For PCI, we cannot tell the difference,
+ * so in that case we just ignore it and hope it goes away.
+ */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ if (IDE_PCI_DEVID_EQ(hwif->pci_devid, IDE_PCI_DEVID_NULL))
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+ {
+ /*
+ * Probably not a shared PCI interrupt,
+ * so we can safely try to do something about it:
+ */
+ unexpected_intr(irq, hwgroup);
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ } else {
+ /*
+ * Whack the status register, just in case we have a leftover pending IRQ.
+ */
+ (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+ drive = hwgroup->drive;
+ if (!drive) {
+ /*
+ * This should NEVER happen, and there isn't much we could do about it here.
+ */
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+ if (!drive_is_ready(drive)) {
+ /*
+ * This happens regularly when we share a PCI IRQ with another device.
+ * Unfortunately, it can also happen with some buggy drives that trigger
+ * the IRQ before their status register is up to date. Hopefully we have
+ * enough advance overhead that the latter isn't a problem.
+ */
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+ if (!hwgroup->busy) {
+ hwgroup->busy = 1; /* paranoia */
+ printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name);
+ }
+ hwgroup->handler = NULL;
+ del_timer(&hwgroup->timer);
+ spin_unlock(&io_request_lock);
+
+ if (drive->unmask)
+ ide__sti(); /* local CPU only */
+ startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */
+ spin_lock_irq(&io_request_lock);
+
+ /*
+ * Note that handler() may have set things up for another
+ * interrupt to occur soon, but it cannot happen until
+ * we exit from this routine, because it will be the
+ * same irq as is currently being serviced here, and Linux
+ * won't allow another of the same (on any CPU) until we return.
+ */
+ set_recovery_timer(HWIF(drive));
+ drive->service_time = jiffies - drive->service_start;
+ if (startstop == ide_stopped) {
+ if (hwgroup->handler == NULL) { /* paranoia */
+ hwgroup->busy = 0;
+ ide_do_request(hwgroup, hwif->irq);
+ } else {
+ printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name);
+ }
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * get_info_ptr() returns the (ide_drive_t *) for a given device number.
+ * It returns NULL if the given device number does not match any present drives.
+ */
+ide_drive_t *get_info_ptr (kdev_t i_rdev)
+{
+ int major = MAJOR(i_rdev);
+#if 0
+ int minor = MINOR(i_rdev) & PARTN_MASK;
+#endif
+ unsigned int h;
+
+ for (h = 0; h < MAX_HWIFS; ++h) {
+ ide_hwif_t *hwif = &ide_hwifs[h];
+ if (hwif->present && major == hwif->major) {
+ unsigned unit = DEVICE_NR(i_rdev);
+ if (unit < MAX_DRIVES) {
+ ide_drive_t *drive = &hwif->drives[unit];
+#if 0
+ if ((drive->present) && (drive->part[minor].nr_sects))
+#else
+ if (drive->present)
+#endif
+ return drive;
+ }
+ break;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_cmd (struct request *rq)
+{
+ memset(rq, 0, sizeof(*rq));
+ rq->cmd = IDE_DRIVE_CMD;
+}
+
+/*
+ * This function issues a special IDE device request
+ * onto the request queue.
+ *
+ * If action is ide_wait, then the rq is queued at the end of the
+ * request queue, and the function sleeps until it has been processed.
+ * This is for use when invoked from an ioctl handler.
+ *
+ * If action is ide_preempt, then the rq is queued at the head of
+ * the request queue, displacing the currently-being-processed
+ * request and this function returns immediately without waiting
+ * for the new rq to be completed. This is VERY DANGEROUS, and is
+ * intended for careful use by the ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_next, then the rq is queued immediately after
+ * the currently-being-processed-request (if any), and the function
+ * returns without waiting for the new rq to be completed. As above,
+ * This is VERY DANGEROUS, and is intended for careful use by the
+ * ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_end, then the rq is queued at the end of the
+ * request queue, and the function returns immediately without waiting
+ * for the new rq to be completed. This is again intended for careful
+ * use by the ATAPI tape/cdrom driver code.
+ */
+int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action)
+{
+ unsigned long flags;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ unsigned int major = HWIF(drive)->major;
+ struct list_head *queue_head = &drive->queue.queue_head;
+ /*DECLARE_COMPLETION(wait);*/
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (HWIF(drive)->chipset == ide_pdc4030 && rq->buffer != NULL)
+ return -ENOSYS; /* special drive cmds not supported */
+#endif
+ rq->errors = 0;
+ rq->rq_status = RQ_ACTIVE;
+ rq->rq_dev = MKDEV(major,(drive->select.b.unit)<<PARTN_BITS);
+ if (action == ide_wait) {
+ printk("SMH says: wait on IDE device but no queue :-(\n");
+ return 0;
+ }
+ spin_lock_irqsave(&io_request_lock, flags);
+ if (list_empty(queue_head) || action == ide_preempt) {
+ if (action == ide_preempt)
+ hwgroup->rq = NULL;
+ } else {
+ if (action == ide_wait || action == ide_end) {
+ queue_head = queue_head->prev;
+ } else
+ queue_head = queue_head->next;
+ }
+ list_add(&rq->queue, queue_head);
+ ide_do_request(hwgroup, 0);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return 0;
+
+}
+
+/*
+ * This routine is called to flush all partitions and partition tables
+ * for a changed disk, and then re-read the new partition table.
+ * If we are revalidating a disk because of a media change, then we
+ * enter with usage == 0. If we are using an ioctl, we automatically have
+ * usage == 1 (we need an open channel to use an ioctl :-), so this
+ * is our limit.
+ */
+int ide_revalidate_disk (kdev_t i_rdev)
+{
+ ide_drive_t *drive;
+ ide_hwgroup_t *hwgroup;
+ unsigned int p, major, minor;
+ unsigned long flags;
+
+ if ((drive = get_info_ptr(i_rdev)) == NULL)
+ return -ENODEV;
+ major = MAJOR(i_rdev);
+ minor = drive->select.b.unit << PARTN_BITS;
+ hwgroup = HWGROUP(drive);
+ spin_lock_irqsave(&io_request_lock, flags);
+ if (drive->busy || (drive->usage > 1)) {
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return -EBUSY;
+ };
+ drive->busy = 1;
+ MOD_INC_USE_COUNT;
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ for (p = 0; p < (1<<PARTN_BITS); ++p) {
+ if (drive->part[p].nr_sects > 0) {
+ kdev_t devp = MKDEV(major, minor+p);
+ invalidate_device(devp, 1);
+ }
+ drive->part[p].start_sect = 0;
+ drive->part[p].nr_sects = 0;
+ };
+
+ if (DRIVER(drive)->revalidate)
+ DRIVER(drive)->revalidate(drive);
+
+ drive->busy = 0;
+ /*wake_up(&drive->wqueue);*/
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+static void revalidate_drives (void)
+{
+ ide_hwif_t *hwif;
+ ide_drive_t *drive;
+ int index, unit;
+
+ for (index = 0; index < MAX_HWIFS; ++index) {
+ hwif = &ide_hwifs[index];
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ drive = &ide_hwifs[index].drives[unit];
+ if (drive->revalidate) {
+ drive->revalidate = 0;
+ if (!initializing)
+ (void) ide_revalidate_disk(MKDEV(hwif->major, unit<<PARTN_BITS));
+ }
+ }
+ }
+}
+
+static void ide_probe_module (void)
+{
+ if (!ide_probe) {
+#if defined(CONFIG_KMOD) && defined(CONFIG_BLK_DEV_IDE_MODULE)
+ (void) request_module("ide-probe-mod");
+#endif /* (CONFIG_KMOD) && (CONFIG_BLK_DEV_IDE_MODULE) */
+ } else {
+ (void) ide_probe->init();
+ }
+ revalidate_drives();
+}
+
+static void ide_driver_module (void)
+{
+ int index;
+ ide_module_t *module = ide_modules;
+
+ for (index = 0; index < MAX_HWIFS; ++index)
+ if (ide_hwifs[index].present)
+ goto search;
+ ide_probe_module();
+search:
+ while (module) {
+ (void) module->init();
+ module = module->next;
+ }
+ revalidate_drives();
+}
+
+static int ide_open (struct inode * inode, struct file * filp)
+{
+ ide_drive_t *drive;
+
+ if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
+ return -ENXIO;
+ if (drive->driver == NULL)
+ ide_driver_module();
+#ifdef CONFIG_KMOD
+ if (drive->driver == NULL) {
+ if (drive->media == ide_disk)
+ (void) request_module("ide-disk");
+ if (drive->media == ide_cdrom)
+ (void) request_module("ide-cd");
+ if (drive->media == ide_tape)
+ (void) request_module("ide-tape");
+ if (drive->media == ide_floppy)
+ (void) request_module("ide-floppy");
+#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI)
+ if (drive->media == ide_scsi)
+ (void) request_module("ide-scsi");
+#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */
+ }
+#endif /* CONFIG_KMOD */
+#if 0
+ while (drive->busy)
+ sleep_on(&drive->wqueue);
+#endif
+ drive->usage++;
+ if (drive->driver != NULL)
+ return DRIVER(drive)->open(inode, filp, drive);
+ printk ("%s: driver not present\n", drive->name);
+ drive->usage--;
+ return -ENXIO;
+}
+
+/*
+ * Releasing a block device means we sync() it, so that it can safely
+ * be forgotten about...
+ */
+static int ide_release (struct inode * inode, struct file * file)
+{
+ ide_drive_t *drive;
+
+ if ((drive = get_info_ptr(inode->i_rdev)) != NULL) {
+ drive->usage--;
+ if (drive->driver != NULL)
+ DRIVER(drive)->release(inode, file, drive);
+ }
+ return 0;
+}
+
+int ide_replace_subdriver (ide_drive_t *drive, const char *driver)
+{
+ if (!drive->present || drive->busy || drive->usage)
+ goto abort;
+ if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+ goto abort;
+ strncpy(drive->driver_req, driver, 9);
+ ide_driver_module();
+ drive->driver_req[0] = 0;
+ ide_driver_module();
+ if (DRIVER(drive) && !strcmp(DRIVER(drive)->name, driver))
+ return 0;
+abort:
+ return 1;
+}
+
+#ifdef CONFIG_PROC_FS
+ide_proc_entry_t generic_subdriver_entries[] = {
+ { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
+ { NULL, 0, NULL, NULL }
+};
+#endif
+
+/*
+ * Note that we only release the standard ports,
+ * and do not even try to handle any extra ports
+ * allocated for weird IDE interface chipsets.
+ */
+void hwif_unregister (ide_hwif_t *hwif)
+{
+ if (hwif->straight8) {
+ ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 8);
+ goto jump_eight;
+ }
+ if (hwif->io_ports[IDE_DATA_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 1);
+ if (hwif->io_ports[IDE_ERROR_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_ERROR_OFFSET], 1);
+ if (hwif->io_ports[IDE_NSECTOR_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1);
+ if (hwif->io_ports[IDE_SECTOR_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1);
+ if (hwif->io_ports[IDE_LCYL_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_LCYL_OFFSET], 1);
+ if (hwif->io_ports[IDE_HCYL_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_HCYL_OFFSET], 1);
+ if (hwif->io_ports[IDE_SELECT_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_SELECT_OFFSET], 1);
+ if (hwif->io_ports[IDE_STATUS_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_STATUS_OFFSET], 1);
+jump_eight:
+ if (hwif->io_ports[IDE_CONTROL_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+ if (hwif->io_ports[IDE_IRQ_OFFSET])
+ ide_release_region(hwif->io_ports[IDE_IRQ_OFFSET], 1);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+}
+
+void ide_unregister (unsigned int index)
+{
+ struct gendisk *gd;
+ ide_drive_t *drive, *d;
+ ide_hwif_t *hwif, *g;
+ ide_hwgroup_t *hwgroup;
+ int irq_count = 0, unit, i;
+ unsigned long flags;
+ unsigned int p, minor;
+ ide_hwif_t old_hwif;
+
+ if (index >= MAX_HWIFS)
+ return;
+ save_flags(flags); /* all CPUs */
+ cli(); /* all CPUs */
+ hwif = &ide_hwifs[index];
+ if (!hwif->present)
+ goto abort;
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ drive = &hwif->drives[unit];
+ if (!drive->present)
+ continue;
+ if (drive->busy || drive->usage)
+ goto abort;
+ if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+ goto abort;
+ }
+ hwif->present = 0;
+
+ /*
+ * All clear? Then blow away the buffer cache
+ */
+ sti();
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ drive = &hwif->drives[unit];
+ if (!drive->present)
+ continue;
+ minor = drive->select.b.unit << PARTN_BITS;
+ for (p = 0; p < (1<<PARTN_BITS); ++p) {
+ if (drive->part[p].nr_sects > 0) {
+ kdev_t devp = MKDEV(hwif->major, minor+p);
+ invalidate_device(devp, 0);
+ }
+ }
+#ifdef CONFIG_PROC_FS
+ destroy_proc_ide_drives(hwif);
+#endif
+ }
+ cli();
+ hwgroup = hwif->hwgroup;
+
+ /*
+ * free the irq if we were the only hwif using it
+ */
+ g = hwgroup->hwif;
+ do {
+ if (g->irq == hwif->irq)
+ ++irq_count;
+ g = g->next;
+ } while (g != hwgroup->hwif);
+ if (irq_count == 1)
+ free_irq(hwif->irq, hwgroup);
+
+ /*
+ * Note that we only release the standard ports,
+ * and do not even try to handle any extra ports
+ * allocated for weird IDE interface chipsets.
+ */
+ hwif_unregister(hwif);
+
+ /*
+ * Remove us from the hwgroup, and free
+ * the hwgroup if we were the only member
+ */
+ d = hwgroup->drive;
+ for (i = 0; i < MAX_DRIVES; ++i) {
+ drive = &hwif->drives[i];
+#ifdef DEVFS_MUST_DIE
+ if (drive->de) {
+ devfs_unregister (drive->de);
+ drive->de = NULL;
+ }
+#endif
+ if (!drive->present)
+ continue;
+ while (hwgroup->drive->next != drive)
+ hwgroup->drive = hwgroup->drive->next;
+ hwgroup->drive->next = drive->next;
+ if (hwgroup->drive == drive)
+ hwgroup->drive = NULL;
+ if (drive->id != NULL) {
+ kfree(drive->id);
+ drive->id = NULL;
+ }
+ drive->present = 0;
+ blk_cleanup_queue(&drive->queue);
+ }
+ if (d->present)
+ hwgroup->drive = d;
+ while (hwgroup->hwif->next != hwif)
+ hwgroup->hwif = hwgroup->hwif->next;
+ hwgroup->hwif->next = hwif->next;
+ if (hwgroup->hwif == hwif)
+ kfree(hwgroup);
+ else
+ hwgroup->hwif = HWIF(hwgroup->drive);
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+ if (hwif->dma_base) {
+ (void) ide_release_dma(hwif);
+ hwif->dma_base = 0;
+ }
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+ /*
+ * Remove us from the kernel's knowledge
+ */
+ unregister_blkdev(hwif->major, hwif->name);
+ kfree(blksize_size[hwif->major]);
+ kfree(max_sectors[hwif->major]);
+ /*kfree(max_readahead[hwif->major]);*/
+ blk_dev[hwif->major].data = NULL;
+ blk_dev[hwif->major].queue = NULL;
+ blksize_size[hwif->major] = NULL;
+ gd = hwif->gd;
+ if (gd) {
+ del_gendisk(gd);
+ kfree(gd->sizes);
+ kfree(gd->part);
+#ifdef DEVFS_MUST_DIE
+ if (gd->de_arr)
+ kfree (gd->de_arr);
+#endif
+ if (gd->flags)
+ kfree (gd->flags);
+ kfree(gd);
+ hwif->gd = NULL;
+ }
+ old_hwif = *hwif;
+ init_hwif_data (index); /* restore hwif data to pristine status */
+ hwif->hwgroup = old_hwif.hwgroup;
+ hwif->tuneproc = old_hwif.tuneproc;
+ hwif->speedproc = old_hwif.speedproc;
+ hwif->selectproc = old_hwif.selectproc;
+ hwif->resetproc = old_hwif.resetproc;
+ hwif->intrproc = old_hwif.intrproc;
+ hwif->maskproc = old_hwif.maskproc;
+ hwif->quirkproc = old_hwif.quirkproc;
+ hwif->rwproc = old_hwif.rwproc;
+ hwif->ideproc = old_hwif.ideproc;
+ hwif->dmaproc = old_hwif.dmaproc;
+ hwif->busproc = old_hwif.busproc;
+ hwif->bus_state = old_hwif.bus_state;
+ hwif->dma_base = old_hwif.dma_base;
+ hwif->dma_extra = old_hwif.dma_extra;
+ hwif->config_data = old_hwif.config_data;
+ hwif->select_data = old_hwif.select_data;
+ hwif->proc = old_hwif.proc;
+#ifndef CONFIG_BLK_DEV_IDECS
+ hwif->irq = old_hwif.irq;
+#endif /* CONFIG_BLK_DEV_IDECS */
+ hwif->major = old_hwif.major;
+ hwif->chipset = old_hwif.chipset;
+ hwif->autodma = old_hwif.autodma;
+ hwif->udma_four = old_hwif.udma_four;
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ hwif->pci_dev = old_hwif.pci_dev;
+ hwif->pci_devid = old_hwif.pci_devid;
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+ hwif->straight8 = old_hwif.straight8;
+ hwif->hwif_data = old_hwif.hwif_data;
+abort:
+ restore_flags(flags); /* all CPUs */
+}
+
+/*
+ * Setup hw_regs_t structure described by parameters. You
+ * may set up the hw structure yourself OR use this routine to
+ * do it for you.
+ */
+void ide_setup_ports ( hw_regs_t *hw,
+ ide_ioreg_t base, int *offsets,
+ ide_ioreg_t ctrl, ide_ioreg_t intr,
+ ide_ack_intr_t *ack_intr, int irq)
+{
+ int i;
+
+ for (i = 0; i < IDE_NR_PORTS; i++) {
+ if (offsets[i] == -1) {
+ switch(i) {
+ case IDE_CONTROL_OFFSET:
+ hw->io_ports[i] = ctrl;
+ break;
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+ case IDE_IRQ_OFFSET:
+ hw->io_ports[i] = intr;
+ break;
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+ default:
+ hw->io_ports[i] = 0;
+ break;
+ }
+ } else {
+ hw->io_ports[i] = base + offsets[i];
+ }
+ }
+ hw->irq = irq;
+ hw->dma = NO_DMA;
+ hw->ack_intr = ack_intr;
+}
+
+/*
+ * Register an IDE interface, specifing exactly the registers etc
+ * Set init=1 iff calling before probes have taken place.
+ */
+int ide_register_hw (hw_regs_t *hw, ide_hwif_t **hwifp)
+{
+ int index, retry = 1;
+ ide_hwif_t *hwif;
+
+ do {
+ for (index = 0; index < MAX_HWIFS; ++index) {
+ hwif = &ide_hwifs[index];
+ if (hwif->hw.io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET])
+ goto found;
+ }
+ for (index = 0; index < MAX_HWIFS; ++index) {
+ hwif = &ide_hwifs[index];
+ if ((!hwif->present && !hwif->mate && !initializing) ||
+ (!hwif->hw.io_ports[IDE_DATA_OFFSET] && initializing))
+ goto found;
+ }
+ for (index = 0; index < MAX_HWIFS; index++)
+ ide_unregister(index);
+ } while (retry--);
+ return -1;
+found:
+ if (hwif->present)
+ ide_unregister(index);
+ if (hwif->present)
+ return -1;
+ memcpy(&hwif->hw, hw, sizeof(*hw));
+ memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
+ hwif->irq = hw->irq;
+ hwif->noprobe = 0;
+ hwif->chipset = hw->chipset;
+
+ if (!initializing) {
+ ide_probe_module();
+#ifdef CONFIG_PROC_FS
+ create_proc_ide_interfaces();
+#endif
+ ide_driver_module();
+ }
+
+ if (hwifp)
+ *hwifp = hwif;
+
+ return (initializing || hwif->present) ? index : -1;
+}
+
+/*
+ * Compatability function with existing drivers. If you want
+ * something different, use the function above.
+ */
+int ide_register (int arg1, int arg2, int irq)
+{
+ hw_regs_t hw;
+ ide_init_hwif_ports(&hw, (ide_ioreg_t) arg1, (ide_ioreg_t) arg2, NULL);
+ hw.irq = irq;
+ return ide_register_hw(&hw, NULL);
+}
+
+void ide_add_setting (ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set)
+{
+ ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL;
+
+ while ((*p) && strcmp((*p)->name, name) < 0)
+ p = &((*p)->next);
+ if ((setting = kmalloc(sizeof(*setting), GFP_KERNEL)) == NULL)
+ goto abort;
+ memset(setting, 0, sizeof(*setting));
+ if ((setting->name = kmalloc(strlen(name) + 1, GFP_KERNEL)) == NULL)
+ goto abort;
+ strcpy(setting->name, name); setting->rw = rw;
+ setting->read_ioctl = read_ioctl; setting->write_ioctl = write_ioctl;
+ setting->data_type = data_type; setting->min = min;
+ setting->max = max; setting->mul_factor = mul_factor;
+ setting->div_factor = div_factor; setting->data = data;
+ setting->set = set; setting->next = *p;
+ if (drive->driver)
+ setting->auto_remove = 1;
+ *p = setting;
+ return;
+abort:
+ if (setting)
+ kfree(setting);
+}
+
+void ide_remove_setting (ide_drive_t *drive, char *name)
+{
+ ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting;
+
+ while ((*p) && strcmp((*p)->name, name))
+ p = &((*p)->next);
+ if ((setting = (*p)) == NULL)
+ return;
+ (*p) = setting->next;
+ kfree(setting->name);
+ kfree(setting);
+}
+
+static ide_settings_t *ide_find_setting_by_ioctl (ide_drive_t *drive, int cmd)
+{
+ ide_settings_t *setting = drive->settings;
+
+ while (setting) {
+ if (setting->read_ioctl == cmd || setting->write_ioctl == cmd)
+ break;
+ setting = setting->next;
+ }
+ return setting;
+}
+
+ide_settings_t *ide_find_setting_by_name (ide_drive_t *drive, char *name)
+{
+ ide_settings_t *setting = drive->settings;
+
+ while (setting) {
+ if (strcmp(setting->name, name) == 0)
+ break;
+ setting = setting->next;
+ }
+ return setting;
+}
+
+static void auto_remove_settings (ide_drive_t *drive)
+{
+ ide_settings_t *setting;
+repeat:
+ setting = drive->settings;
+ while (setting) {
+ if (setting->auto_remove) {
+ ide_remove_setting(drive, setting->name);
+ goto repeat;
+ }
+ setting = setting->next;
+ }
+}
+
+int ide_read_setting (ide_drive_t *drive, ide_settings_t *setting)
+{
+ int val = -EINVAL;
+ unsigned long flags;
+
+ if ((setting->rw & SETTING_READ)) {
+ spin_lock_irqsave(&io_request_lock, flags);
+ switch(setting->data_type) {
+ case TYPE_BYTE:
+ val = *((u8 *) setting->data);
+ break;
+ case TYPE_SHORT:
+ val = *((u16 *) setting->data);
+ break;
+ case TYPE_INT:
+ case TYPE_INTA:
+ val = *((u32 *) setting->data);
+ break;
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ }
+ return val;
+}
+
+int ide_spin_wait_hwgroup (ide_drive_t *drive)
+{
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ unsigned long timeout = jiffies + (3 * HZ);
+
+ spin_lock_irq(&io_request_lock);
+
+ while (hwgroup->busy) {
+ unsigned long lflags;
+ spin_unlock_irq(&io_request_lock);
+ __save_flags(lflags); /* local CPU only */
+ __sti(); /* local CPU only; needed for jiffies */
+ if (0 < (signed long)(jiffies - timeout)) {
+ __restore_flags(lflags); /* local CPU only */
+ printk("%s: channel busy\n", drive->name);
+ return -EBUSY;
+ }
+ __restore_flags(lflags); /* local CPU only */
+ spin_lock_irq(&io_request_lock);
+ }
+ return 0;
+}
+
+/*
+ * FIXME: This should be changed to enqueue a special request
+ * to the driver to change settings, and then wait on a sema for completion.
+ * The current scheme of polling is kludgey, though safe enough.
+ */
+int ide_write_setting (ide_drive_t *drive, ide_settings_t *setting, int val)
+{
+ int i;
+ u32 *p;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (!(setting->rw & SETTING_WRITE))
+ return -EPERM;
+ if (val < setting->min || val > setting->max)
+ return -EINVAL;
+ if (setting->set)
+ return setting->set(drive, val);
+ if (ide_spin_wait_hwgroup(drive))
+ return -EBUSY;
+ switch (setting->data_type) {
+ case TYPE_BYTE:
+ *((u8 *) setting->data) = val;
+ break;
+ case TYPE_SHORT:
+ *((u16 *) setting->data) = val;
+ break;
+ case TYPE_INT:
+ *((u32 *) setting->data) = val;
+ break;
+ case TYPE_INTA:
+ p = (u32 *) setting->data;
+ for (i = 0; i < 1 << PARTN_BITS; i++, p++)
+ *p = val;
+ break;
+ }
+ spin_unlock_irq(&io_request_lock);
+ return 0;
+}
+
+static int set_io_32bit(ide_drive_t *drive, int arg)
+{
+ drive->io_32bit = arg;
+#ifdef CONFIG_BLK_DEV_DTC2278
+ if (HWIF(drive)->chipset == ide_dtc2278)
+ HWIF(drive)->drives[!drive->select.b.unit].io_32bit = arg;
+#endif /* CONFIG_BLK_DEV_DTC2278 */
+ return 0;
+}
+
+static int set_using_dma (ide_drive_t *drive, int arg)
+{
+ if (!drive->driver || !DRIVER(drive)->supports_dma)
+ return -EPERM;
+ if (!drive->id || !(drive->id->capability & 1) || !HWIF(drive)->dmaproc)
+ return -EPERM;
+ if (HWIF(drive)->dmaproc(arg ? ide_dma_on : ide_dma_off, drive))
+ return -EIO;
+ return 0;
+}
+
+static int set_pio_mode (ide_drive_t *drive, int arg)
+{
+ struct request rq;
+
+ if (!HWIF(drive)->tuneproc)
+ return -ENOSYS;
+ if (drive->special.b.set_tune)
+ return -EBUSY;
+ ide_init_drive_cmd(&rq);
+ drive->tune_req = (byte) arg;
+ drive->special.b.set_tune = 1;
+ (void) ide_do_drive_cmd (drive, &rq, ide_wait);
+ return 0;
+}
+
+void ide_add_generic_settings (ide_drive_t *drive)
+{
+/*
+ * drive setting name read/write access read ioctl write ioctl data type min max mul_factor div_factor data pointer set function
+ */
+ ide_add_setting(drive, "io_32bit", drive->no_io_32bit ? SETTING_READ : SETTING_RW, HDIO_GET_32BIT, HDIO_SET_32BIT, TYPE_BYTE, 0, 1 + (SUPPORT_VLB_SYNC << 1), 1, 1, &drive->io_32bit, set_io_32bit);
+ ide_add_setting(drive, "keepsettings", SETTING_RW, HDIO_GET_KEEPSETTINGS, HDIO_SET_KEEPSETTINGS, TYPE_BYTE, 0, 1, 1, 1, &drive->keep_settings, NULL);
+ ide_add_setting(drive, "nice1", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->nice1, NULL);
+ ide_add_setting(drive, "pio_mode", SETTING_WRITE, -1, HDIO_SET_PIO_MODE, TYPE_BYTE, 0, 255, 1, 1, NULL, set_pio_mode);
+ ide_add_setting(drive, "slow", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->slow, NULL);
+ ide_add_setting(drive, "unmaskirq", drive->no_unmask ? SETTING_READ : SETTING_RW, HDIO_GET_UNMASKINTR, HDIO_SET_UNMASKINTR, TYPE_BYTE, 0, 1, 1, 1, &drive->unmask, NULL);
+ ide_add_setting(drive, "using_dma", SETTING_RW, HDIO_GET_DMA, HDIO_SET_DMA, TYPE_BYTE, 0, 1, 1, 1, &drive->using_dma, set_using_dma);
+ ide_add_setting(drive, "ide_scsi", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->scsi, NULL);
+ ide_add_setting(drive, "init_speed", SETTING_RW, -1, -1, TYPE_BYTE, 0, 69, 1, 1, &drive->init_speed, NULL);
+ ide_add_setting(drive, "current_speed", SETTING_RW, -1, -1, TYPE_BYTE, 0, 69, 1, 1, &drive->current_speed, NULL);
+ ide_add_setting(drive, "number", SETTING_RW, -1, -1, TYPE_BYTE, 0, 3, 1, 1, &drive->dn, NULL);
+}
+
+int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf)
+{
+ struct request rq;
+ byte buffer[4];
+
+ if (!buf)
+ buf = buffer;
+ memset(buf, 0, 4 + SECTOR_WORDS * 4 * sectors);
+ ide_init_drive_cmd(&rq);
+ rq.buffer = buf;
+ *buf++ = cmd;
+ *buf++ = nsect;
+ *buf++ = feature;
+ *buf++ = sectors;
+ return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+int ide_wait_cmd_task (ide_drive_t *drive, byte *buf)
+{
+ struct request rq;
+
+ ide_init_drive_cmd(&rq);
+ rq.cmd = IDE_DRIVE_TASK;
+ rq.buffer = buf;
+ return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+/*
+ * Delay for *at least* 50ms. As we don't know how much time is left
+ * until the next tick occurs, we wait an extra tick to be safe.
+ * This is used only during the probing/polling for drives at boot time.
+ *
+ * However, its usefullness may be needed in other places, thus we export it now.
+ * The future may change this to a millisecond setable delay.
+ */
+void ide_delay_50ms (void)
+{
+#ifndef CONFIG_BLK_DEV_IDECS
+ mdelay(50);
+#else
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ/20);
+#endif /* CONFIG_BLK_DEV_IDECS */
+}
+
+int system_bus_clock (void)
+{
+ return((int) ((!system_bus_speed) ? ide_system_bus_speed() : system_bus_speed ));
+}
+
+int ide_reinit_drive (ide_drive_t *drive)
+{
+ switch (drive->media) {
+#ifdef CONFIG_BLK_DEV_IDECD
+ case ide_cdrom:
+ {
+ extern int ide_cdrom_reinit(ide_drive_t *drive);
+ if (ide_cdrom_reinit(drive))
+ return 1;
+ break;
+ }
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+ case ide_disk:
+ {
+ extern int idedisk_reinit(ide_drive_t *drive);
+ if (idedisk_reinit(drive))
+ return 1;
+ break;
+ }
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+ case ide_floppy:
+ {
+ extern int idefloppy_reinit(ide_drive_t *drive);
+ if (idefloppy_reinit(drive))
+ return 1;
+ break;
+ }
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+ case ide_tape:
+ {
+ extern int idetape_reinit(ide_drive_t *drive);
+ if (idetape_reinit(drive))
+ return 1;
+ break;
+ }
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+/*
+ * {
+ * extern int idescsi_reinit(ide_drive_t *drive);
+ * if (idescsi_reinit(drive))
+ * return 1;
+ * break;
+ * }
+ */
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+ default:
+ return 1;
+ }
+ return 0;
+}
+
+static int ide_ioctl (struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ int err = 0, major, minor;
+ ide_drive_t *drive;
+ struct request rq;
+ kdev_t dev;
+ ide_settings_t *setting;
+
+ if (!inode || !(dev = inode->i_rdev))
+ return -EINVAL;
+ major = MAJOR(dev); minor = MINOR(dev);
+ if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
+ return -ENODEV;
+
+ if ((setting = ide_find_setting_by_ioctl(drive, cmd)) != NULL) {
+ if (cmd == setting->read_ioctl) {
+ err = ide_read_setting(drive, setting);
+ return err >= 0 ? put_user(err, (long *) arg) : err;
+ } else {
+ if ((MINOR(inode->i_rdev) & PARTN_MASK))
+ return -EINVAL;
+ return ide_write_setting(drive, setting, arg);
+ }
+ }
+
+ ide_init_drive_cmd (&rq);
+ switch (cmd) {
+ case HDIO_GETGEO:
+ {
+ struct hd_geometry *loc = (struct hd_geometry *) arg;
+ unsigned short bios_cyl = drive->bios_cyl; /* truncate */
+ if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+ if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
+ if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
+ if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT;
+ if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+ (unsigned long *) &loc->start)) return -EFAULT;
+ return 0;
+ }
+
+ case HDIO_GETGEO_BIG:
+ {
+ struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
+ if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+ if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
+ if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
+ if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT;
+ if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+ (unsigned long *) &loc->start)) return -EFAULT;
+ return 0;
+ }
+
+ case HDIO_GETGEO_BIG_RAW:
+ {
+ struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
+ if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+ if (put_user(drive->head, (byte *) &loc->heads)) return -EFAULT;
+ if (put_user(drive->sect, (byte *) &loc->sectors)) return -EFAULT;
+ if (put_user(drive->cyl, (unsigned int *) &loc->cylinders)) return -EFAULT;
+ if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+ (unsigned long *) &loc->start)) return -EFAULT;
+ return 0;
+ }
+
+#if 0
+ case BLKGETSIZE: /* Return device size */
+ return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (unsigned long *) arg);
+ case BLKGETSIZE64:
+ return put_user((u64)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects << 9, (u64 *) arg);
+
+ case BLKRRPART: /* Re-read partition tables */
+ if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+ return ide_revalidate_disk(inode->i_rdev);
+#endif
+
+ case HDIO_OBSOLETE_IDENTITY:
+ case HDIO_GET_IDENTITY:
+ if (MINOR(inode->i_rdev) & PARTN_MASK)
+ return -EINVAL;
+ if (drive->id == NULL)
+ return -ENOMSG;
+ if (copy_to_user((char *)arg, (char *)drive->id, (cmd == HDIO_GET_IDENTITY) ? sizeof(*drive->id) : 142))
+ return -EFAULT;
+ return 0;
+
+ case HDIO_GET_NICE:
+ return put_user(drive->dsc_overlap << IDE_NICE_DSC_OVERLAP |
+ drive->atapi_overlap << IDE_NICE_ATAPI_OVERLAP |
+ drive->nice0 << IDE_NICE_0 |
+ drive->nice1 << IDE_NICE_1 |
+ drive->nice2 << IDE_NICE_2,
+ (long *) arg);
+
+#ifdef CONFIG_IDE_TASK_IOCTL
+ case HDIO_DRIVE_TASKFILE:
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+ return -EACCES;
+ switch(drive->media) {
+ case ide_disk:
+ return ide_taskfile_ioctl(drive, inode, file, cmd, arg);
+#ifdef CONFIG_PKT_TASK_IOCTL
+ case ide_cdrom:
+ case ide_tape:
+ case ide_floppy:
+ return pkt_taskfile_ioctl(drive, inode, file, cmd, arg);
+#endif /* CONFIG_PKT_TASK_IOCTL */
+ default:
+ return -ENOMSG;
+ }
+#endif /* CONFIG_IDE_TASK_IOCTL */
+
+ case HDIO_DRIVE_CMD:
+ {
+ byte args[4], *argbuf = args;
+ byte xfer_rate = 0;
+ int argsize = 4;
+ ide_task_t tfargs;
+
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+ return -EACCES;
+ if (NULL == (void *) arg)
+ return ide_do_drive_cmd(drive, &rq, ide_wait);
+ if (copy_from_user(args, (void *)arg, 4))
+ return -EFAULT;
+
+ tfargs.tfRegister[IDE_FEATURE_OFFSET] = args[2];
+ tfargs.tfRegister[IDE_NSECTOR_OFFSET] = args[3];
+ tfargs.tfRegister[IDE_SECTOR_OFFSET] = args[1];
+ tfargs.tfRegister[IDE_LCYL_OFFSET] = 0x00;
+ tfargs.tfRegister[IDE_HCYL_OFFSET] = 0x00;
+ tfargs.tfRegister[IDE_SELECT_OFFSET] = 0x00;
+ tfargs.tfRegister[IDE_COMMAND_OFFSET] = args[0];
+
+ if (args[3]) {
+ argsize = 4 + (SECTOR_WORDS * 4 * args[3]);
+ argbuf = kmalloc(argsize, GFP_KERNEL);
+ if (argbuf == NULL)
+ return -ENOMEM;
+ memcpy(argbuf, args, 4);
+ }
+
+ if (set_transfer(drive, &tfargs)) {
+ xfer_rate = args[1];
+ if (ide_ata66_check(drive, &tfargs))
+ goto abort;
+ }
+
+ err = ide_wait_cmd(drive, args[0], args[1], args[2], args[3], argbuf);
+
+ if (!err && xfer_rate) {
+ /* active-retuning-calls future */
+ if ((HWIF(drive)->speedproc) != NULL)
+ HWIF(drive)->speedproc(drive, xfer_rate);
+ ide_driveid_update(drive);
+ }
+ abort:
+ if (copy_to_user((void *)arg, argbuf, argsize))
+ err = -EFAULT;
+ if (argsize > 4)
+ kfree(argbuf);
+ return err;
+ }
+ case HDIO_DRIVE_TASK:
+ {
+ byte args[7], *argbuf = args;
+ int argsize = 7;
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES;
+ if (copy_from_user(args, (void *)arg, 7))
+ return -EFAULT;
+ err = ide_wait_cmd_task(drive, argbuf);
+ if (copy_to_user((void *)arg, argbuf, argsize))
+ err = -EFAULT;
+ return err;
+ }
+ case HDIO_SCAN_HWIF:
+ {
+ int args[3];
+ if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+ if (copy_from_user(args, (void *)arg, 3 * sizeof(int)))
+ return -EFAULT;
+ if (ide_register(args[0], args[1], args[2]) == -1)
+ return -EIO;
+ return 0;
+ }
+ case HDIO_UNREGISTER_HWIF:
+ if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+ /* (arg > MAX_HWIFS) checked in function */
+ ide_unregister(arg);
+ return 0;
+ case HDIO_SET_NICE:
+ if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+ if (drive->driver == NULL)
+ return -EPERM;
+ if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1))))
+ return -EPERM;
+ drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1;
+ if (drive->dsc_overlap && !DRIVER(drive)->supports_dsc_overlap) {
+ drive->dsc_overlap = 0;
+ return -EPERM;
+ }
+ drive->nice1 = (arg >> IDE_NICE_1) & 1;
+ return 0;
+ case HDIO_DRIVE_RESET:
+ {
+ unsigned long flags;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+ if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+#if 1
+ spin_lock_irqsave(&io_request_lock, flags);
+ if (hwgroup->handler != NULL) {
+ printk("%s: ide_set_handler: handler not null; %p\n", drive->name, hwgroup->handler);
+ (void) hwgroup->handler(drive);
+// hwgroup->handler = NULL;
+// hwgroup->expiry = NULL;
+ hwgroup->timer.expires = jiffies + 0;;
+ del_timer(&hwgroup->timer);
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+#endif
+ (void) ide_do_reset(drive);
+ if (drive->suspend_reset) {
+/*
+ * APM WAKE UP todo !!
+ * int nogoodpower = 1;
+ * while(nogoodpower) {
+ * check_power1() or check_power2()
+ * nogoodpower = 0;
+ * }
+ * HWIF(drive)->multiproc(drive);
+ */
+ return ide_revalidate_disk(inode->i_rdev);
+ }
+ return 0;
+ }
+#if 0
+ case BLKROSET:
+ case BLKROGET:
+ case BLKFLSBUF:
+ case BLKSSZGET:
+ case BLKPG:
+ case BLKELVGET:
+ case BLKELVSET:
+ case BLKBSZGET:
+ case BLKBSZSET:
+ return blk_ioctl(inode->i_rdev, cmd, arg);
+#endif
+
+ case HDIO_GET_BUSSTATE:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (put_user(HWIF(drive)->bus_state, (long *)arg))
+ return -EFAULT;
+ return 0;
+
+ case HDIO_SET_BUSSTATE:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (HWIF(drive)->busproc)
+ HWIF(drive)->busproc(drive, (int)arg);
+ return 0;
+
+ default:
+ if (drive->driver != NULL)
+ return DRIVER(drive)->ioctl(drive, inode, file, cmd, arg);
+ return -EPERM;
+ }
+}
+
+static int ide_check_media_change (kdev_t i_rdev)
+{
+ ide_drive_t *drive;
+
+ if ((drive = get_info_ptr(i_rdev)) == NULL)
+ return -ENODEV;
+ if (drive->driver != NULL)
+ return DRIVER(drive)->media_change(drive);
+ return 0;
+}
+
+void ide_fixstring (byte *s, const int bytecount, const int byteswap)
+{
+ byte *p = s, *end = &s[bytecount & ~1]; /* bytecount must be even */
+
+ if (byteswap) {
+ /* convert from big-endian to host byte order */
+ for (p = end ; p != s;) {
+ unsigned short *pp = (unsigned short *) (p -= 2);
+ *pp = ntohs(*pp);
+ }
+ }
+
+ /* strip leading blanks */
+ while (s != end && *s == ' ')
+ ++s;
+
+ /* compress internal blanks and strip trailing blanks */
+ while (s != end && *s) {
+ if (*s++ != ' ' || (s != end && *s && *s != ' '))
+ *p++ = *(s-1);
+ }
+
+ /* wipe out trailing garbage */
+ while (p != end)
+ *p++ = '\0';
+}
+
+/*
+ * stridx() returns the offset of c within s,
+ * or -1 if c is '\0' or not found within s.
+ */
+static int __init stridx (const char *s, char c)
+{
+ char *i = strchr(s, c);
+ return (i && c) ? i - s : -1;
+}
+
+/*
+ * match_parm() does parsing for ide_setup():
+ *
+ * 1. the first char of s must be '='.
+ * 2. if the remainder matches one of the supplied keywords,
+ * the index (1 based) of the keyword is negated and returned.
+ * 3. if the remainder is a series of no more than max_vals numbers
+ * separated by commas, the numbers are saved in vals[] and a
+ * count of how many were saved is returned. Base10 is assumed,
+ * and base16 is allowed when prefixed with "0x".
+ * 4. otherwise, zero is returned.
+ */
+static int __init match_parm (char *s, const char *keywords[], int vals[], int max_vals)
+{
+ static const char *decimal = "0123456789";
+ static const char *hex = "0123456789abcdef";
+ int i, n;
+
+ if (*s++ == '=') {
+ /*
+ * Try matching against the supplied keywords,
+ * and return -(index+1) if we match one
+ */
+ if (keywords != NULL) {
+ for (i = 0; *keywords != NULL; ++i) {
+ if (!strcmp(s, *keywords++))
+ return -(i+1);
+ }
+ }
+ /*
+ * Look for a series of no more than "max_vals"
+ * numeric values separated by commas, in base10,
+ * or base16 when prefixed with "0x".
+ * Return a count of how many were found.
+ */
+ for (n = 0; (i = stridx(decimal, *s)) >= 0;) {
+ vals[n] = i;
+ while ((i = stridx(decimal, *++s)) >= 0)
+ vals[n] = (vals[n] * 10) + i;
+ if (*s == 'x' && !vals[n]) {
+ while ((i = stridx(hex, *++s)) >= 0)
+ vals[n] = (vals[n] * 0x10) + i;
+ }
+ if (++n == max_vals)
+ break;
+ if (*s == ',' || *s == ';')
+ ++s;
+ }
+ if (!*s)
+ return n;
+ }
+ return 0; /* zero = nothing matched */
+}
+
+/*
+ * ide_setup() gets called VERY EARLY during initialization,
+ * to handle kernel "command line" strings beginning with "hdx="
+ * or "ide". Here is the complete set currently supported:
+ *
+ * "hdx=" is recognized for all "x" from "a" to "h", such as "hdc".
+ * "idex=" is recognized for all "x" from "0" to "3", such as "ide1".
+ *
+ * "hdx=noprobe" : drive may be present, but do not probe for it
+ * "hdx=none" : drive is NOT present, ignore cmos and do not probe
+ * "hdx=nowerr" : ignore the WRERR_STAT bit on this drive
+ * "hdx=cdrom" : drive is present, and is a cdrom drive
+ * "hdx=cyl,head,sect" : disk drive is present, with specified geometry
+ * "hdx=noremap" : do not remap 0->1 even though EZD was detected
+ * "hdx=autotune" : driver will attempt to tune interface speed
+ * to the fastest PIO mode supported,
+ * if possible for this drive only.
+ * Not fully supported by all chipset types,
+ * and quite likely to cause trouble with
+ * older/odd IDE drives.
+ *
+ * "hdx=slow" : insert a huge pause after each access to the data
+ * port. Should be used only as a last resort.
+ *
+ * "hdx=swapdata" : when the drive is a disk, byte swap all data
+ * "hdx=bswap" : same as above..........
+ * "hdxlun=xx" : set the drive last logical unit.
+ * "hdx=flash" : allows for more than one ata_flash disk to be
+ * registered. In most cases, only one device
+ * will be present.
+ * "hdx=scsi" : the return of the ide-scsi flag, this is useful for
+ * allowwing ide-floppy, ide-tape, and ide-cdrom|writers
+ * to use ide-scsi emulation on a device specific option.
+ * "idebus=xx" : inform IDE driver of VESA/PCI bus speed in MHz,
+ * where "xx" is between 20 and 66 inclusive,
+ * used when tuning chipset PIO modes.
+ * For PCI bus, 25 is correct for a P75 system,
+ * 30 is correct for P90,P120,P180 systems,
+ * and 33 is used for P100,P133,P166 systems.
+ * If in doubt, use idebus=33 for PCI.
+ * As for VLB, it is safest to not specify it.
+ *
+ * "idex=noprobe" : do not attempt to access/use this interface
+ * "idex=base" : probe for an interface at the addr specified,
+ * where "base" is usually 0x1f0 or 0x170
+ * and "ctl" is assumed to be "base"+0x206
+ * "idex=base,ctl" : specify both base and ctl
+ * "idex=base,ctl,irq" : specify base, ctl, and irq number
+ * "idex=autotune" : driver will attempt to tune interface speed
+ * to the fastest PIO mode supported,
+ * for all drives on this interface.
+ * Not fully supported by all chipset types,
+ * and quite likely to cause trouble with
+ * older/odd IDE drives.
+ * "idex=noautotune" : driver will NOT attempt to tune interface speed
+ * This is the default for most chipsets,
+ * except the cmd640.
+ * "idex=serialize" : do not overlap operations on idex and ide(x^1)
+ * "idex=four" : four drives on idex and ide(x^1) share same ports
+ * "idex=reset" : reset interface before first use
+ * "idex=dma" : enable DMA by default on both drives if possible
+ * "idex=ata66" : informs the interface that it has an 80c cable
+ * for chipsets that are ATA-66 capable, but
+ * the ablity to bit test for detection is
+ * currently unknown.
+ * "ide=reverse" : Formerly called to pci sub-system, but now local.
+ *
+ * The following are valid ONLY on ide0, (except dc4030)
+ * and the defaults for the base,ctl ports must not be altered.
+ *
+ * "ide0=dtc2278" : probe/support DTC2278 interface
+ * "ide0=ht6560b" : probe/support HT6560B interface
+ * "ide0=cmd640_vlb" : *REQUIRED* for VLB cards with the CMD640 chip
+ * (not for PCI -- automatically detected)
+ * "ide0=qd65xx" : probe/support qd65xx interface
+ * "ide0=ali14xx" : probe/support ali14xx chipsets (ALI M1439, M1443, M1445)
+ * "ide0=umc8672" : probe/support umc8672 chipsets
+ * "idex=dc4030" : probe/support Promise DC4030VL interface
+ * "ide=doubler" : probe/support IDE doublers on Amiga
+ */
+int __init ide_setup (char *s)
+{
+ int i, vals[3];
+ ide_hwif_t *hwif;
+ ide_drive_t *drive;
+ unsigned int hw, unit;
+ const char max_drive = 'a' + ((MAX_HWIFS * MAX_DRIVES) - 1);
+ const char max_hwif = '0' + (MAX_HWIFS - 1);
+
+
+ if (strncmp(s,"hd",2) == 0 && s[2] == '=') /* hd= is for hd.c */
+ return 0; /* driver and not us */
+
+ if (strncmp(s,"ide",3) &&
+ strncmp(s,"idebus",6) &&
+ strncmp(s,"hd",2)) /* hdx= & hdxlun= */
+ return 0;
+
+ printk("ide_setup: %s", s);
+ init_ide_data ();
+
+#ifdef CONFIG_BLK_DEV_IDEDOUBLER
+ if (!strcmp(s, "ide=doubler")) {
+ extern int ide_doubler;
+
+ printk(" : Enabled support for IDE doublers\n");
+ ide_doubler = 1;
+ return 1;
+ }
+#endif /* CONFIG_BLK_DEV_IDEDOUBLER */
+
+ if (!strcmp(s, "ide=nodma")) {
+ printk("IDE: Prevented DMA\n");
+ noautodma = 1;
+ return 1;
+ }
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ if (!strcmp(s, "ide=reverse")) {
+ ide_scan_direction = 1;
+ printk(" : Enabled support for IDE inverse scan order.\n");
+ return 1;
+ }
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+ /*
+ * Look for drive options: "hdx="
+ */
+ if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) {
+ const char *hd_words[] = {"none", "noprobe", "nowerr", "cdrom",
+ "serialize", "autotune", "noautotune",
+ "slow", "swapdata", "bswap", "flash",
+ "remap", "noremap", "scsi", NULL};
+ unit = s[2] - 'a';
+ hw = unit / MAX_DRIVES;
+ unit = unit % MAX_DRIVES;
+ hwif = &ide_hwifs[hw];
+ drive = &hwif->drives[unit];
+ if (strncmp(s + 4, "ide-", 4) == 0) {
+ strncpy(drive->driver_req, s + 4, 9);
+ goto done;
+ }
+ /*
+ * Look for last lun option: "hdxlun="
+ */
+ if (s[3] == 'l' && s[4] == 'u' && s[5] == 'n') {
+ if (match_parm(&s[6], NULL, vals, 1) != 1)
+ goto bad_option;
+ if (vals[0] >= 0 && vals[0] <= 7) {
+ drive->last_lun = vals[0];
+ drive->forced_lun = 1;
+ } else
+ printk(" -- BAD LAST LUN! Expected value from 0 to 7");
+ goto done;
+ }
+ switch (match_parm(&s[3], hd_words, vals, 3)) {
+ case -1: /* "none" */
+ drive->nobios = 1; /* drop into "noprobe" */
+ case -2: /* "noprobe" */
+ drive->noprobe = 1;
+ goto done;
+ case -3: /* "nowerr" */
+ drive->bad_wstat = BAD_R_STAT;
+ hwif->noprobe = 0;
+ goto done;
+ case -4: /* "cdrom" */
+ drive->present = 1;
+ drive->media = ide_cdrom;
+ hwif->noprobe = 0;
+ goto done;
+ case -5: /* "serialize" */
+ printk(" -- USE \"ide%d=serialize\" INSTEAD", hw);
+ goto do_serialize;
+ case -6: /* "autotune" */
+ drive->autotune = 1;
+ goto done;
+ case -7: /* "noautotune" */
+ drive->autotune = 2;
+ goto done;
+ case -8: /* "slow" */
+ drive->slow = 1;
+ goto done;
+ case -9: /* "swapdata" or "bswap" */
+ case -10:
+ drive->bswap = 1;
+ goto done;
+ case -11: /* "flash" */
+ drive->ata_flash = 1;
+ goto done;
+ case -12: /* "remap" */
+ drive->remap_0_to_1 = 1;
+ goto done;
+ case -13: /* "noremap" */
+ drive->remap_0_to_1 = 2;
+ goto done;
+ case -14: /* "scsi" */
+#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI)
+ drive->scsi = 1;
+ goto done;
+#else
+ drive->scsi = 0;
+ goto bad_option;
+#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */
+ case 3: /* cyl,head,sect */
+ drive->media = ide_disk;
+ drive->cyl = drive->bios_cyl = vals[0];
+ drive->head = drive->bios_head = vals[1];
+ drive->sect = drive->bios_sect = vals[2];
+ drive->present = 1;
+ drive->forced_geom = 1;
+ hwif->noprobe = 0;
+ goto done;
+ default:
+ goto bad_option;
+ }
+ }
+
+ if (s[0] != 'i' || s[1] != 'd' || s[2] != 'e')
+ goto bad_option;
+ /*
+ * Look for bus speed option: "idebus="
+ */
+ if (s[3] == 'b' && s[4] == 'u' && s[5] == 's') {
+ if (match_parm(&s[6], NULL, vals, 1) != 1)
+ goto bad_option;
+ if (vals[0] >= 20 && vals[0] <= 66) {
+ idebus_parameter = vals[0];
+ } else
+ printk(" -- BAD BUS SPEED! Expected value from 20 to 66");
+ goto done;
+ }
+ /*
+ * Look for interface options: "idex="
+ */
+ if (s[3] >= '0' && s[3] <= max_hwif) {
+ /*
+ * Be VERY CAREFUL changing this: note hardcoded indexes below
+ * -8,-9,-10 : are reserved for future idex calls to ease the hardcoding.
+ */
+ const char *ide_words[] = {
+ "noprobe", "serialize", "autotune", "noautotune", "reset", "dma", "ata66",
+ "minus8", "minus9", "minus10",
+ "four", "qd65xx", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", NULL };
+ hw = s[3] - '0';
+ hwif = &ide_hwifs[hw];
+ i = match_parm(&s[4], ide_words, vals, 3);
+
+ /*
+ * Cryptic check to ensure chipset not already set for hwif:
+ */
+ if (i > 0 || i <= -11) { /* is parameter a chipset name? */
+ if (hwif->chipset != ide_unknown)
+ goto bad_option; /* chipset already specified */
+ if (i <= -11 && i != -18 && hw != 0)
+ goto bad_hwif; /* chipset drivers are for "ide0=" only */
+ if (i <= -11 && i != -18 && ide_hwifs[hw+1].chipset != ide_unknown)
+ goto bad_option; /* chipset for 2nd port already specified */
+ printk("\n");
+ }
+
+ switch (i) {
+#ifdef CONFIG_BLK_DEV_PDC4030
+ case -18: /* "dc4030" */
+ {
+ extern void init_pdc4030(void);
+ init_pdc4030();
+ goto done;
+ }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+#ifdef CONFIG_BLK_DEV_ALI14XX
+ case -17: /* "ali14xx" */
+ {
+ extern void init_ali14xx (void);
+ init_ali14xx();
+ goto done;
+ }
+#endif /* CONFIG_BLK_DEV_ALI14XX */
+#ifdef CONFIG_BLK_DEV_UMC8672
+ case -16: /* "umc8672" */
+ {
+ extern void init_umc8672 (void);
+ init_umc8672();
+ goto done;
+ }
+#endif /* CONFIG_BLK_DEV_UMC8672 */
+#ifdef CONFIG_BLK_DEV_DTC2278
+ case -15: /* "dtc2278" */
+ {
+ extern void init_dtc2278 (void);
+ init_dtc2278();
+ goto done;
+ }
+#endif /* CONFIG_BLK_DEV_DTC2278 */
+#ifdef CONFIG_BLK_DEV_CMD640
+ case -14: /* "cmd640_vlb" */
+ {
+ extern int cmd640_vlb; /* flag for cmd640.c */
+ cmd640_vlb = 1;
+ goto done;
+ }
+#endif /* CONFIG_BLK_DEV_CMD640 */
+#ifdef CONFIG_BLK_DEV_HT6560B
+ case -13: /* "ht6560b" */
+ {
+ extern void init_ht6560b (void);
+ init_ht6560b();
+ goto done;
+ }
+#endif /* CONFIG_BLK_DEV_HT6560B */
+#if CONFIG_BLK_DEV_QD65XX
+ case -12: /* "qd65xx" */
+ {
+ extern void init_qd65xx (void);
+ init_qd65xx();
+ goto done;
+ }
+#endif /* CONFIG_BLK_DEV_QD65XX */
+#ifdef CONFIG_BLK_DEV_4DRIVES
+ case -11: /* "four" drives on one set of ports */
+ {
+ ide_hwif_t *mate = &ide_hwifs[hw^1];
+ mate->drives[0].select.all ^= 0x20;
+ mate->drives[1].select.all ^= 0x20;
+ hwif->chipset = mate->chipset = ide_4drives;
+ mate->irq = hwif->irq;
+ memcpy(mate->io_ports, hwif->io_ports, sizeof(hwif->io_ports));
+ goto do_serialize;
+ }
+#endif /* CONFIG_BLK_DEV_4DRIVES */
+ case -10: /* minus10 */
+ case -9: /* minus9 */
+ case -8: /* minus8 */
+ goto bad_option;
+ case -7: /* ata66 */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ hwif->udma_four = 1;
+ goto done;
+#else /* !CONFIG_BLK_DEV_IDEPCI */
+ hwif->udma_four = 0;
+ goto bad_hwif;
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+ case -6: /* dma */
+ hwif->autodma = 1;
+ goto done;
+ case -5: /* "reset" */
+ hwif->reset = 1;
+ goto done;
+ case -4: /* "noautotune" */
+ hwif->drives[0].autotune = 2;
+ hwif->drives[1].autotune = 2;
+ goto done;
+ case -3: /* "autotune" */
+ hwif->drives[0].autotune = 1;
+ hwif->drives[1].autotune = 1;
+ goto done;
+ case -2: /* "serialize" */
+ do_serialize:
+ hwif->mate = &ide_hwifs[hw^1];
+ hwif->mate->mate = hwif;
+ hwif->serialized = hwif->mate->serialized = 1;
+ goto done;
+
+ case -1: /* "noprobe" */
+ hwif->noprobe = 1;
+ goto done;
+
+ case 1: /* base */
+ vals[1] = vals[0] + 0x206; /* default ctl */
+ case 2: /* base,ctl */
+ vals[2] = 0; /* default irq = probe for it */
+ case 3: /* base,ctl,irq */
+ hwif->hw.irq = vals[2];
+ ide_init_hwif_ports(&hwif->hw, (ide_ioreg_t) vals[0], (ide_ioreg_t) vals[1], &hwif->irq);
+ memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+ hwif->irq = vals[2];
+ hwif->noprobe = 0;
+ hwif->chipset = ide_generic;
+ goto done;
+
+ case 0: goto bad_option;
+ default:
+ printk(" -- SUPPORT NOT CONFIGURED IN THIS KERNEL\n");
+ return 1;
+ }
+ }
+bad_option:
+ printk(" -- BAD OPTION\n");
+ return 1;
+bad_hwif:
+ printk("-- NOT SUPPORTED ON ide%d", hw);
+done:
+ printk("\n");
+ return 1;
+}
+
+/*
+ * probe_for_hwifs() finds/initializes "known" IDE interfaces
+ */
+static void __init probe_for_hwifs (void)
+{
+#ifdef CONFIG_PCI
+ if (pci_present())
+ {
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ ide_scan_pcibus(ide_scan_direction);
+#else
+#ifdef CONFIG_BLK_DEV_RZ1000
+ {
+ extern void ide_probe_for_rz100x(void);
+ ide_probe_for_rz100x();
+ }
+#endif /* CONFIG_BLK_DEV_RZ1000 */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+ }
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_ETRAX_IDE
+ {
+ extern void init_e100_ide(void);
+ init_e100_ide();
+ }
+#endif /* CONFIG_ETRAX_IDE */
+#ifdef CONFIG_BLK_DEV_CMD640
+ {
+ extern void ide_probe_for_cmd640x(void);
+ ide_probe_for_cmd640x();
+ }
+#endif /* CONFIG_BLK_DEV_CMD640 */
+#ifdef CONFIG_BLK_DEV_PDC4030
+ {
+ extern int ide_probe_for_pdc4030(void);
+ (void) ide_probe_for_pdc4030();
+ }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+#ifdef CONFIG_BLK_DEV_IDE_PMAC
+ {
+ extern void pmac_ide_probe(void);
+ pmac_ide_probe();
+ }
+#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+#ifdef CONFIG_BLK_DEV_IDE_SWARM
+ {
+ extern void swarm_ide_probe(void);
+ swarm_ide_probe();
+ }
+#endif /* CONFIG_BLK_DEV_IDE_SWARM */
+#ifdef CONFIG_BLK_DEV_IDE_ICSIDE
+ {
+ extern void icside_init(void);
+ icside_init();
+ }
+#endif /* CONFIG_BLK_DEV_IDE_ICSIDE */
+#ifdef CONFIG_BLK_DEV_IDE_RAPIDE
+ {
+ extern void rapide_init(void);
+ rapide_init();
+ }
+#endif /* CONFIG_BLK_DEV_IDE_RAPIDE */
+#ifdef CONFIG_BLK_DEV_GAYLE
+ {
+ extern void gayle_init(void);
+ gayle_init();
+ }
+#endif /* CONFIG_BLK_DEV_GAYLE */
+#ifdef CONFIG_BLK_DEV_FALCON_IDE
+ {
+ extern void falconide_init(void);
+ falconide_init();
+ }
+#endif /* CONFIG_BLK_DEV_FALCON_IDE */
+#ifdef CONFIG_BLK_DEV_MAC_IDE
+ {
+ extern void macide_init(void);
+ macide_init();
+ }
+#endif /* CONFIG_BLK_DEV_MAC_IDE */
+#ifdef CONFIG_BLK_DEV_Q40IDE
+ {
+ extern void q40ide_init(void);
+ q40ide_init();
+ }
+#endif /* CONFIG_BLK_DEV_Q40IDE */
+#ifdef CONFIG_BLK_DEV_BUDDHA
+ {
+ extern void buddha_init(void);
+ buddha_init();
+ }
+#endif /* CONFIG_BLK_DEV_BUDDHA */
+#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP)
+ {
+ extern void pnpide_init(int enable);
+ pnpide_init(1);
+ }
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+}
+
+void __init ide_init_builtin_drivers (void)
+{
+ /*
+ * Probe for special PCI and other "known" interface chipsets
+ */
+ probe_for_hwifs ();
+
+#ifdef CONFIG_BLK_DEV_IDE
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+ if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+ ide_get_lock(&ide_lock, NULL, NULL); /* for atari only */
+ disable_irq(ide_hwifs[0].irq); /* disable_irq_nosync ?? */
+// disable_irq_nosync(ide_hwifs[0].irq);
+ }
+#endif /* __mc68000__ || CONFIG_APUS */
+
+ (void) ideprobe_init();
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+ if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+ enable_irq(ide_hwifs[0].irq);
+ ide_release_lock(&ide_lock); /* for atari only */
+ }
+#endif /* __mc68000__ || CONFIG_APUS */
+#endif /* CONFIG_BLK_DEV_IDE */
+
+#ifdef CONFIG_PROC_FS
+ proc_ide_create();
+#endif
+
+ /*
+ * Attempt to match drivers for the available drives
+ */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+ (void) idedisk_init();
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDECD
+ (void) ide_cdrom_init();
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+ (void) idetape_init();
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+ (void) idefloppy_init();
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+ #ifdef CONFIG_SCSI
+ (void) idescsi_init();
+ #else
+ #warning ide scsi-emulation selected but no SCSI-subsystem in kernel
+ #endif
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+}
+
+static int default_cleanup (ide_drive_t *drive)
+{
+ return ide_unregister_subdriver(drive);
+}
+
+static int default_standby (ide_drive_t *drive)
+{
+ return 0;
+}
+
+static int default_flushcache (ide_drive_t *drive)
+{
+ return 0;
+}
+
+static ide_startstop_t default_do_request(ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ ide_end_request(0, HWGROUP(drive));
+ return ide_stopped;
+}
+
+static void default_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+ ide_end_request(uptodate, hwgroup);
+}
+
+static int default_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ return -EIO;
+}
+
+static int default_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+ drive->usage--;
+ return -EIO;
+}
+
+static void default_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+}
+
+static int default_check_media_change (ide_drive_t *drive)
+{
+ return 1;
+}
+
+static void default_pre_reset (ide_drive_t *drive)
+{
+}
+
+static unsigned long default_capacity (ide_drive_t *drive)
+{
+ return 0x7fffffff;
+}
+
+static ide_startstop_t default_special (ide_drive_t *drive)
+{
+ special_t *s = &drive->special;
+
+ s->all = 0;
+ drive->mult_req = 0;
+ return ide_stopped;
+}
+
+static int default_reinit (ide_drive_t *drive)
+{
+ printk(KERN_ERR "%s: does not support hotswap of device class !\n", drive->name);
+
+ return 0;
+}
+
+static void setup_driver_defaults (ide_drive_t *drive)
+{
+ ide_driver_t *d = drive->driver;
+
+ if (d->cleanup == NULL) d->cleanup = default_cleanup;
+ if (d->standby == NULL) d->standby = default_standby;
+ if (d->flushcache == NULL) d->flushcache = default_flushcache;
+ if (d->do_request == NULL) d->do_request = default_do_request;
+ if (d->end_request == NULL) d->end_request = default_end_request;
+ if (d->ioctl == NULL) d->ioctl = default_ioctl;
+ if (d->open == NULL) d->open = default_open;
+ if (d->release == NULL) d->release = default_release;
+ if (d->media_change == NULL) d->media_change = default_check_media_change;
+ if (d->pre_reset == NULL) d->pre_reset = default_pre_reset;
+ if (d->capacity == NULL) d->capacity = default_capacity;
+ if (d->special == NULL) d->special = default_special;
+ if (d->reinit == NULL) d->reinit = default_reinit;
+}
+
+ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n)
+{
+ unsigned int unit, index, i;
+
+ for (index = 0, i = 0; index < MAX_HWIFS; ++index) {
+ ide_hwif_t *hwif = &ide_hwifs[index];
+ if (!hwif->present)
+ continue;
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ ide_drive_t *drive = &hwif->drives[unit];
+ char *req = drive->driver_req;
+ if (*req && !strstr(name, req))
+ continue;
+ if (drive->present && drive->media == media && drive->driver == driver && ++i > n)
+ return drive;
+ }
+ }
+ return NULL;
+}
+
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version)
+{
+ unsigned long flags;
+
+ save_flags(flags); /* all CPUs */
+ cli(); /* all CPUs */
+ if (version != IDE_SUBDRIVER_VERSION || !drive->present || drive->driver != NULL || drive->busy || drive->usage) {
+ restore_flags(flags); /* all CPUs */
+ return 1;
+ }
+ drive->driver = driver;
+ setup_driver_defaults(drive);
+ restore_flags(flags); /* all CPUs */
+ if (drive->autotune != 2) {
+ if (driver->supports_dma && HWIF(drive)->dmaproc != NULL) {
+ /*
+ * Force DMAing for the beginning of the check.
+ * Some chipsets appear to do interesting things,
+ * if not checked and cleared.
+ * PARANOIA!!!
+ */
+ (void) (HWIF(drive)->dmaproc(ide_dma_off_quietly, drive));
+ (void) (HWIF(drive)->dmaproc(ide_dma_check, drive));
+ }
+ drive->dsc_overlap = (drive->next != drive && driver->supports_dsc_overlap);
+ drive->nice1 = 1;
+ }
+ drive->revalidate = 1;
+ drive->suspend_reset = 0;
+#ifdef CONFIG_PROC_FS
+ ide_add_proc_entries(drive->proc, generic_subdriver_entries, drive);
+ ide_add_proc_entries(drive->proc, driver->proc, drive);
+#endif
+ return 0;
+}
+
+int ide_unregister_subdriver (ide_drive_t *drive)
+{
+ unsigned long flags;
+
+ save_flags(flags); /* all CPUs */
+ cli(); /* all CPUs */
+ if (drive->usage || drive->busy || drive->driver == NULL || DRIVER(drive)->busy) {
+ restore_flags(flags); /* all CPUs */
+ return 1;
+ }
+#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP) && defined(MODULE)
+ pnpide_init(0);
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+#ifdef CONFIG_PROC_FS
+ ide_remove_proc_entries(drive->proc, DRIVER(drive)->proc);
+ ide_remove_proc_entries(drive->proc, generic_subdriver_entries);
+#endif
+ auto_remove_settings(drive);
+ drive->driver = NULL;
+ restore_flags(flags); /* all CPUs */
+ return 0;
+}
+
+int ide_register_module (ide_module_t *module)
+{
+ ide_module_t *p = ide_modules;
+
+ while (p) {
+ if (p == module)
+ return 1;
+ p = p->next;
+ }
+ module->next = ide_modules;
+ ide_modules = module;
+ revalidate_drives();
+ return 0;
+}
+
+void ide_unregister_module (ide_module_t *module)
+{
+ ide_module_t **p;
+
+ for (p = &ide_modules; (*p) && (*p) != module; p = &((*p)->next));
+ if (*p)
+ *p = (*p)->next;
+}
+
+struct block_device_operations ide_fops[] = {{
+ open: ide_open,
+ release: ide_release,
+ ioctl: ide_ioctl,
+ check_media_change: ide_check_media_change,
+ revalidate: ide_revalidate_disk
+}};
+
+EXPORT_SYMBOL(ide_hwifs);
+EXPORT_SYMBOL(ide_register_module);
+EXPORT_SYMBOL(ide_unregister_module);
+EXPORT_SYMBOL(ide_spin_wait_hwgroup);
+
+/*
+ * Probe module
+ */
+#ifdef DEVFS_MUST_DIE
+devfs_handle_t ide_devfs_handle;
+#endif
+
+EXPORT_SYMBOL(ide_probe);
+EXPORT_SYMBOL(drive_is_flashcard);
+EXPORT_SYMBOL(ide_timer_expiry);
+EXPORT_SYMBOL(ide_intr);
+EXPORT_SYMBOL(ide_fops);
+EXPORT_SYMBOL(ide_get_queue);
+EXPORT_SYMBOL(ide_add_generic_settings);
+#ifdef DEVFS_MUST_DIE
+EXPORT_SYMBOL(ide_devfs_handle);
+#endif
+EXPORT_SYMBOL(do_ide_request);
+/*
+ * Driver module
+ */
+EXPORT_SYMBOL(ide_scan_devices);
+EXPORT_SYMBOL(ide_register_subdriver);
+EXPORT_SYMBOL(ide_unregister_subdriver);
+EXPORT_SYMBOL(ide_replace_subdriver);
+EXPORT_SYMBOL(ide_input_data);
+EXPORT_SYMBOL(ide_output_data);
+EXPORT_SYMBOL(atapi_input_bytes);
+EXPORT_SYMBOL(atapi_output_bytes);
+EXPORT_SYMBOL(drive_is_ready);
+EXPORT_SYMBOL(ide_set_handler);
+EXPORT_SYMBOL(ide_dump_status);
+EXPORT_SYMBOL(ide_error);
+EXPORT_SYMBOL(ide_fixstring);
+EXPORT_SYMBOL(ide_wait_stat);
+EXPORT_SYMBOL(ide_do_reset);
+EXPORT_SYMBOL(restart_request);
+EXPORT_SYMBOL(ide_init_drive_cmd);
+EXPORT_SYMBOL(ide_do_drive_cmd);
+EXPORT_SYMBOL(ide_end_drive_cmd);
+EXPORT_SYMBOL(ide_end_request);
+EXPORT_SYMBOL(ide_revalidate_disk);
+EXPORT_SYMBOL(ide_cmd);
+EXPORT_SYMBOL(ide_wait_cmd);
+EXPORT_SYMBOL(ide_wait_cmd_task);
+EXPORT_SYMBOL(ide_delay_50ms);
+EXPORT_SYMBOL(ide_stall_queue);
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(ide_add_proc_entries);
+EXPORT_SYMBOL(ide_remove_proc_entries);
+EXPORT_SYMBOL(proc_ide_read_geometry);
+EXPORT_SYMBOL(create_proc_ide_interfaces);
+EXPORT_SYMBOL(recreate_proc_ide_device);
+EXPORT_SYMBOL(destroy_proc_ide_device);
+#endif
+EXPORT_SYMBOL(ide_add_setting);
+EXPORT_SYMBOL(ide_remove_setting);
+
+EXPORT_SYMBOL(ide_register_hw);
+EXPORT_SYMBOL(ide_register);
+EXPORT_SYMBOL(ide_unregister);
+EXPORT_SYMBOL(ide_setup_ports);
+EXPORT_SYMBOL(hwif_unregister);
+EXPORT_SYMBOL(get_info_ptr);
+EXPORT_SYMBOL(current_capacity);
+
+EXPORT_SYMBOL(system_bus_clock);
+
+EXPORT_SYMBOL(ide_reinit_drive);
+
+#if 0
+static int ide_notify_reboot (struct notifier_block *this, unsigned long event, void *x)
+{
+ ide_hwif_t *hwif;
+ ide_drive_t *drive;
+ int i, unit;
+
+ switch (event) {
+ case SYS_HALT:
+ case SYS_POWER_OFF:
+ case SYS_RESTART:
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ printk("flushing ide devices: ");
+
+ for (i = 0; i < MAX_HWIFS; i++) {
+ hwif = &ide_hwifs[i];
+ if (!hwif->present)
+ continue;
+ for (unit = 0; unit < MAX_DRIVES; ++unit) {
+ drive = &hwif->drives[unit];
+ if (!drive->present)
+ continue;
+
+ /* set the drive to standby */
+ printk("%s ", drive->name);
+ if (event != SYS_RESTART)
+ if (drive->driver != NULL && DRIVER(drive)->standby(drive))
+ continue;
+
+ if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+ continue;
+ }
+ }
+ printk("\n");
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ide_notifier = {
+ ide_notify_reboot,
+ NULL,
+ 5
+};
+#endif
+
+/*
+ * This is gets invoked once during initialization, to set *everything* up
+ */
+int __init ide_init (void)
+{
+ static char banner_printed;
+ int i;
+
+ if (!banner_printed) {
+ printk(KERN_INFO "Uniform Multi-Platform E-IDE driver " REVISION "\n");
+#ifdef DEVFS_MUST_DIE
+ ide_devfs_handle = devfs_mk_dir (NULL, "ide", NULL);
+#endif
+ system_bus_speed = ide_system_bus_speed();
+ banner_printed = 1;
+ }
+
+ init_ide_data ();
+
+ initializing = 1;
+ ide_init_builtin_drivers();
+ initializing = 0;
+
+ for (i = 0; i < MAX_HWIFS; ++i) {
+ ide_hwif_t *hwif = &ide_hwifs[i];
+ if (hwif->present)
+ ide_geninit(hwif);
+ }
+
+ /*register_reboot_notifier(&ide_notifier);*/
+ return 0;
+}
+
+#ifdef MODULE
+char *options = NULL;
+MODULE_PARM(options,"s");
+MODULE_LICENSE("GPL");
+
+static void __init parse_options (char *line)
+{
+ char *next = line;
+
+ if (line == NULL || !*line)
+ return;
+ while ((line = next) != NULL) {
+ if ((next = strchr(line,' ')) != NULL)
+ *next++ = 0;
+ if (!ide_setup(line))
+ printk ("Unknown option '%s'\n", line);
+ }
+}
+
+int init_module (void)
+{
+ parse_options(options);
+ return ide_init();
+}
+
+void cleanup_module (void)
+{
+ int index;
+
+ /*unregister_reboot_notifier(&ide_notifier);*/
+ for (index = 0; index < MAX_HWIFS; ++index) {
+ ide_unregister(index);
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+ if (ide_hwifs[index].dma_base)
+ (void) ide_release_dma(&ide_hwifs[index]);
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+ }
+
+#ifdef CONFIG_PROC_FS
+ proc_ide_destroy();
+#endif
+#ifdef DEVFS_MUST_DIE
+ devfs_unregister (ide_devfs_handle);
+#endif
+}
+
+#else /* !MODULE */
+
+__setup("", ide_setup);
+
+#endif /* MODULE */
diff --git a/xen/drivers/ide/ide_modes.h b/xen/drivers/ide/ide_modes.h
new file mode 100644
index 0000000000..16b8cf123a
--- /dev/null
+++ b/xen/drivers/ide/ide_modes.h
@@ -0,0 +1,236 @@
+/*
+ * linux/drivers/ide/ide_modes.h
+ *
+ * Copyright (C) 1996 Linus Torvalds, Igor Abramov, and Mark Lord
+ */
+
+#ifndef _IDE_MODES_H
+#define _IDE_MODES_H
+
+#include <linux/config.h>
+
+/*
+ * Shared data/functions for determining best PIO mode for an IDE drive.
+ * Most of this stuff originally lived in cmd640.c, and changes to the
+ * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid
+ * breaking the fragile cmd640.c support.
+ */
+
+#ifdef CONFIG_BLK_DEV_IDE_MODES
+
+/*
+ * Standard (generic) timings for PIO modes, from ATA2 specification.
+ * These timings are for access to the IDE data port register *only*.
+ * Some drives may specify a mode, while also specifying a different
+ * value for cycle_time (from drive identification data).
+ */
+typedef struct ide_pio_timings_s {
+ int setup_time; /* Address setup (ns) minimum */
+ int active_time; /* Active pulse (ns) minimum */
+ int cycle_time; /* Cycle time (ns) minimum = (setup + active + recovery) */
+} ide_pio_timings_t;
+
+typedef struct ide_pio_data_s {
+ byte pio_mode;
+ byte use_iordy;
+ byte overridden;
+ byte blacklisted;
+ unsigned int cycle_time;
+} ide_pio_data_t;
+
+#ifndef _IDE_C
+
+int ide_scan_pio_blacklist (char *model);
+byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d);
+extern const ide_pio_timings_t ide_pio_timings[6];
+
+#else /* _IDE_C */
+
+const ide_pio_timings_t ide_pio_timings[6] = {
+ { 70, 165, 600 }, /* PIO Mode 0 */
+ { 50, 125, 383 }, /* PIO Mode 1 */
+ { 30, 100, 240 }, /* PIO Mode 2 */
+ { 30, 80, 180 }, /* PIO Mode 3 with IORDY */
+ { 25, 70, 120 }, /* PIO Mode 4 with IORDY */
+ { 20, 50, 100 } /* PIO Mode 5 with IORDY (nonstandard) */
+};
+
+/*
+ * Black list. Some drives incorrectly report their maximal PIO mode,
+ * at least in respect to CMD640. Here we keep info on some known drives.
+ */
+static struct ide_pio_info {
+ const char *name;
+ int pio;
+} ide_pio_blacklist [] = {
+/* { "Conner Peripherals 1275MB - CFS1275A", 4 }, */
+ { "Conner Peripherals 540MB - CFS540A", 3 },
+
+ { "WDC AC2700", 3 },
+ { "WDC AC2540", 3 },
+ { "WDC AC2420", 3 },
+ { "WDC AC2340", 3 },
+ { "WDC AC2250", 0 },
+ { "WDC AC2200", 0 },
+ { "WDC AC21200", 4 },
+ { "WDC AC2120", 0 },
+ { "WDC AC2850", 3 },
+ { "WDC AC1270", 3 },
+ { "WDC AC1170", 1 },
+ { "WDC AC1210", 1 },
+ { "WDC AC280", 0 },
+/* { "WDC AC21000", 4 }, */
+ { "WDC AC31000", 3 },
+ { "WDC AC31200", 3 },
+/* { "WDC AC31600", 4 }, */
+
+ { "Maxtor 7131 AT", 1 },
+ { "Maxtor 7171 AT", 1 },
+ { "Maxtor 7213 AT", 1 },
+ { "Maxtor 7245 AT", 1 },
+ { "Maxtor 7345 AT", 1 },
+ { "Maxtor 7546 AT", 3 },
+ { "Maxtor 7540 AV", 3 },
+
+ { "SAMSUNG SHD-3121A", 1 },
+ { "SAMSUNG SHD-3122A", 1 },
+ { "SAMSUNG SHD-3172A", 1 },
+
+/* { "ST51080A", 4 },
+ * { "ST51270A", 4 },
+ * { "ST31220A", 4 },
+ * { "ST31640A", 4 },
+ * { "ST32140A", 4 },
+ * { "ST3780A", 4 },
+ */
+ { "ST5660A", 3 },
+ { "ST3660A", 3 },
+ { "ST3630A", 3 },
+ { "ST3655A", 3 },
+ { "ST3391A", 3 },
+ { "ST3390A", 1 },
+ { "ST3600A", 1 },
+ { "ST3290A", 0 },
+ { "ST3144A", 0 },
+ { "ST3491A", 1 }, /* reports 3, should be 1 or 2 (depending on */
+ /* drive) according to Seagates FIND-ATA program */
+
+ { "QUANTUM ELS127A", 0 },
+ { "QUANTUM ELS170A", 0 },
+ { "QUANTUM LPS240A", 0 },
+ { "QUANTUM LPS210A", 3 },
+ { "QUANTUM LPS270A", 3 },
+ { "QUANTUM LPS365A", 3 },
+ { "QUANTUM LPS540A", 3 },
+ { "QUANTUM LIGHTNING 540A", 3 },
+ { "QUANTUM LIGHTNING 730A", 3 },
+
+ { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */
+ { "QUANTUM FIREBALL_640", 3 },
+ { "QUANTUM FIREBALL_1080", 3 },
+ { "QUANTUM FIREBALL_1280", 3 },
+ { NULL, 0 }
+};
+
+/*
+ * This routine searches the ide_pio_blacklist for an entry
+ * matching the start/whole of the supplied model name.
+ *
+ * Returns -1 if no match found.
+ * Otherwise returns the recommended PIO mode from ide_pio_blacklist[].
+ */
+int ide_scan_pio_blacklist (char *model)
+{
+ struct ide_pio_info *p;
+
+ for (p = ide_pio_blacklist; p->name != NULL; p++) {
+ if (strncmp(p->name, model, strlen(p->name)) == 0)
+ return p->pio;
+ }
+ return -1;
+}
+
+/*
+ * This routine returns the recommended PIO settings for a given drive,
+ * based on the drive->id information and the ide_pio_blacklist[].
+ * This is used by most chipset support modules when "auto-tuning".
+ */
+
+/*
+ * Drive PIO mode auto selection
+ */
+byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d)
+{
+ int pio_mode;
+ int cycle_time = 0;
+ int use_iordy = 0;
+ struct hd_driveid* id = drive->id;
+ int overridden = 0;
+ int blacklisted = 0;
+
+ if (mode_wanted != 255) {
+ pio_mode = mode_wanted;
+ } else if (!drive->id) {
+ pio_mode = 0;
+ } else if ((pio_mode = ide_scan_pio_blacklist(id->model)) != -1) {
+ overridden = 1;
+ blacklisted = 1;
+ use_iordy = (pio_mode > 2);
+ } else {
+ pio_mode = id->tPIO;
+ if (pio_mode > 2) { /* 2 is maximum allowed tPIO value */
+ pio_mode = 2;
+ overridden = 1;
+ }
+ if (id->field_valid & 2) { /* drive implements ATA2? */
+ if (id->capability & 8) { /* drive supports use_iordy? */
+ use_iordy = 1;
+ cycle_time = id->eide_pio_iordy;
+ if (id->eide_pio_modes & 7) {
+ overridden = 0;
+ if (id->eide_pio_modes & 4)
+ pio_mode = 5;
+ else if (id->eide_pio_modes & 2)
+ pio_mode = 4;
+ else
+ pio_mode = 3;
+ }
+ } else {
+ cycle_time = id->eide_pio;
+ }
+ }
+
+#if 0
+ if (drive->id->major_rev_num & 0x0004) printk("ATA-2 ");
+#endif
+
+ /*
+ * Conservative "downgrade" for all pre-ATA2 drives
+ */
+ if (pio_mode && pio_mode < 4) {
+ pio_mode--;
+ overridden = 1;
+#if 0
+ use_iordy = (pio_mode > 2);
+#endif
+ if (cycle_time && cycle_time < ide_pio_timings[pio_mode].cycle_time)
+ cycle_time = 0; /* use standard timing */
+ }
+ }
+ if (pio_mode > max_mode) {
+ pio_mode = max_mode;
+ cycle_time = 0;
+ }
+ if (d) {
+ d->pio_mode = pio_mode;
+ d->cycle_time = cycle_time ? cycle_time : ide_pio_timings[pio_mode].cycle_time;
+ d->use_iordy = use_iordy;
+ d->overridden = overridden;
+ d->blacklisted = blacklisted;
+ }
+ return pio_mode;
+}
+
+#endif /* _IDE_C */
+#endif /* CONFIG_BLK_DEV_IDE_MODES */
+#endif /* _IDE_MODES_H */
diff --git a/xen/drivers/ide/piix.c b/xen/drivers/ide/piix.c
new file mode 100644
index 0000000000..fe538429f3
--- /dev/null
+++ b/xen/drivers/ide/piix.c
@@ -0,0 +1,542 @@
+/*
+ * linux/drivers/ide/piix.c Version 0.32 June 9, 2000
+ *
+ * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
+ * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
+ * May be copied or modified under the terms of the GNU General Public License
+ *
+ * PIO mode setting function for Intel chipsets.
+ * For use instead of BIOS settings.
+ *
+ * 40-41
+ * 42-43
+ *
+ * 41
+ * 43
+ *
+ * | PIO 0 | c0 | 80 | 0 | piix_tune_drive(drive, 0);
+ * | PIO 2 | SW2 | d0 | 90 | 4 | piix_tune_drive(drive, 2);
+ * | PIO 3 | MW1 | e1 | a1 | 9 | piix_tune_drive(drive, 3);
+ * | PIO 4 | MW2 | e3 | a3 | b | piix_tune_drive(drive, 4);
+ *
+ * sitre = word40 & 0x4000; primary
+ * sitre = word42 & 0x4000; secondary
+ *
+ * 44 8421|8421 hdd|hdb
+ *
+ * 48 8421 hdd|hdc|hdb|hda udma enabled
+ *
+ * 0001 hda
+ * 0010 hdb
+ * 0100 hdc
+ * 1000 hdd
+ *
+ * 4a 84|21 hdb|hda
+ * 4b 84|21 hdd|hdc
+ *
+ * ata-33/82371AB
+ * ata-33/82371EB
+ * ata-33/82801AB ata-66/82801AA
+ * 00|00 udma 0 00|00 reserved
+ * 01|01 udma 1 01|01 udma 3
+ * 10|10 udma 2 10|10 udma 4
+ * 11|11 reserved 11|11 reserved
+ *
+ * 54 8421|8421 ata66 drive|ata66 enable
+ *
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x40, &reg40);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x42, &reg42);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x44, &reg44);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x48, &reg48);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x4a, &reg4a);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x54, &reg54);
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/hdreg.h>
+#include <linux/ide.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+
+#include "ide_modes.h"
+
+#define PIIX_DEBUG_DRIVE_INFO 0
+
+#define DISPLAY_PIIX_TIMINGS
+
+#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS)
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+static int piix_get_info(char *, char **, off_t, int);
+extern int (*piix_display_info)(char *, char **, off_t, int); /* ide-proc.c */
+extern char *ide_media_verbose(ide_drive_t *);
+static struct pci_dev *bmide_dev;
+
+static int piix_get_info (char *buffer, char **addr, off_t offset, int count)
+{
+ char *p = buffer;
+ u32 bibma = pci_resource_start(bmide_dev, 4);
+ u16 reg40 = 0, psitre = 0, reg42 = 0, ssitre = 0;
+ u8 c0 = 0, c1 = 0;
+ u8 reg44 = 0, reg48 = 0, reg4a = 0, reg4b = 0, reg54 = 0, reg55 = 0;
+
+ switch(bmide_dev->device) {
+ case PCI_DEVICE_ID_INTEL_82801BA_8:
+ case PCI_DEVICE_ID_INTEL_82801BA_9:
+ case PCI_DEVICE_ID_INTEL_82801CA_10:
+ case PCI_DEVICE_ID_INTEL_82801CA_11:
+ case PCI_DEVICE_ID_INTEL_82801DB_11:
+ case PCI_DEVICE_ID_INTEL_82801E_11:
+ p += sprintf(p, "\n Intel PIIX4 Ultra 100 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82372FB_1:
+ case PCI_DEVICE_ID_INTEL_82801AA_1:
+ p += sprintf(p, "\n Intel PIIX4 Ultra 66 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82451NX:
+ case PCI_DEVICE_ID_INTEL_82801AB_1:
+ case PCI_DEVICE_ID_INTEL_82443MX_1:
+ case PCI_DEVICE_ID_INTEL_82371AB:
+ p += sprintf(p, "\n Intel PIIX4 Ultra 33 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82371SB_1:
+ p += sprintf(p, "\n Intel PIIX3 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82371MX:
+ p += sprintf(p, "\n Intel MPIIX Chipset.\n");
+ return p-buffer; /* => must be less than 4k! */
+ case PCI_DEVICE_ID_INTEL_82371FB_1:
+ case PCI_DEVICE_ID_INTEL_82371FB_0:
+ default:
+ p += sprintf(p, "\n Intel PIIX Chipset.\n");
+ break;
+ }
+
+ pci_read_config_word(bmide_dev, 0x40, &reg40);
+ pci_read_config_word(bmide_dev, 0x42, &reg42);
+ pci_read_config_byte(bmide_dev, 0x44, &reg44);
+ pci_read_config_byte(bmide_dev, 0x48, &reg48);
+ pci_read_config_byte(bmide_dev, 0x4a, &reg4a);
+ pci_read_config_byte(bmide_dev, 0x4b, &reg4b);
+ pci_read_config_byte(bmide_dev, 0x54, &reg54);
+ pci_read_config_byte(bmide_dev, 0x55, &reg55);
+
+ psitre = (reg40 & 0x4000) ? 1 : 0;
+ ssitre = (reg42 & 0x4000) ? 1 : 0;
+
+ /*
+ * at that point bibma+0x2 et bibma+0xa are byte registers
+ * to investigate:
+ */
+ c0 = inb_p((unsigned short)bibma + 0x02);
+ c1 = inb_p((unsigned short)bibma + 0x0a);
+
+ p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n");
+ p += sprintf(p, " %sabled %sabled\n",
+ (c0&0x80) ? "dis" : " en",
+ (c1&0x80) ? "dis" : " en");
+ p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n");
+ p += sprintf(p, "DMA enabled: %s %s %s %s\n",
+ (c0&0x20) ? "yes" : "no ",
+ (c0&0x40) ? "yes" : "no ",
+ (c1&0x20) ? "yes" : "no ",
+ (c1&0x40) ? "yes" : "no " );
+ p += sprintf(p, "UDMA enabled: %s %s %s %s\n",
+ (reg48&0x01) ? "yes" : "no ",
+ (reg48&0x02) ? "yes" : "no ",
+ (reg48&0x04) ? "yes" : "no ",
+ (reg48&0x08) ? "yes" : "no " );
+ p += sprintf(p, "UDMA enabled: %s %s %s %s\n",
+ ((reg54&0x11) && (reg55&0x10) && (reg4a&0x01)) ? "5" :
+ ((reg54&0x11) && (reg4a&0x02)) ? "4" :
+ ((reg54&0x11) && (reg4a&0x01)) ? "3" :
+ (reg4a&0x02) ? "2" :
+ (reg4a&0x01) ? "1" :
+ (reg4a&0x00) ? "0" : "X",
+ ((reg54&0x22) && (reg55&0x20) && (reg4a&0x10)) ? "5" :
+ ((reg54&0x22) && (reg4a&0x20)) ? "4" :
+ ((reg54&0x22) && (reg4a&0x10)) ? "3" :
+ (reg4a&0x20) ? "2" :
+ (reg4a&0x10) ? "1" :
+ (reg4a&0x00) ? "0" : "X",
+ ((reg54&0x44) && (reg55&0x40) && (reg4b&0x03)) ? "5" :
+ ((reg54&0x44) && (reg4b&0x02)) ? "4" :
+ ((reg54&0x44) && (reg4b&0x01)) ? "3" :
+ (reg4b&0x02) ? "2" :
+ (reg4b&0x01) ? "1" :
+ (reg4b&0x00) ? "0" : "X",
+ ((reg54&0x88) && (reg55&0x80) && (reg4b&0x30)) ? "5" :
+ ((reg54&0x88) && (reg4b&0x20)) ? "4" :
+ ((reg54&0x88) && (reg4b&0x10)) ? "3" :
+ (reg4b&0x20) ? "2" :
+ (reg4b&0x10) ? "1" :
+ (reg4b&0x00) ? "0" : "X");
+
+ p += sprintf(p, "UDMA\n");
+ p += sprintf(p, "DMA\n");
+ p += sprintf(p, "PIO\n");
+
+/*
+ * FIXME.... Add configuration junk data....blah blah......
+ */
+
+ return p-buffer; /* => must be less than 4k! */
+}
+#endif /* defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS) */
+
+/*
+ * Used to set Fifo configuration via kernel command line:
+ */
+
+byte piix_proc = 0;
+
+extern char *ide_xfer_verbose (byte xfer_rate);
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING)
+/*
+ *
+ */
+static byte piix_dma_2_pio (byte xfer_rate) {
+ switch(xfer_rate) {
+ case XFER_UDMA_5:
+ case XFER_UDMA_4:
+ case XFER_UDMA_3:
+ case XFER_UDMA_2:
+ case XFER_UDMA_1:
+ case XFER_UDMA_0:
+ case XFER_MW_DMA_2:
+ case XFER_PIO_4:
+ return 4;
+ case XFER_MW_DMA_1:
+ case XFER_PIO_3:
+ return 3;
+ case XFER_SW_DMA_2:
+ case XFER_PIO_2:
+ return 2;
+ case XFER_MW_DMA_0:
+ case XFER_SW_DMA_1:
+ case XFER_SW_DMA_0:
+ case XFER_PIO_1:
+ case XFER_PIO_0:
+ case XFER_PIO_SLOW:
+ default:
+ return 0;
+ }
+}
+#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */
+
+/*
+ * Based on settings done by AMI BIOS
+ * (might be useful if drive is not registered in CMOS for any reason).
+ */
+static void piix_tune_drive (ide_drive_t *drive, byte pio)
+{
+ unsigned long flags;
+ u16 master_data;
+ byte slave_data;
+ int is_slave = (&HWIF(drive)->drives[1] == drive);
+ int master_port = HWIF(drive)->index ? 0x42 : 0x40;
+ int slave_port = 0x44;
+ /* ISP RTC */
+ byte timings[][2] = { { 0, 0 },
+ { 0, 0 },
+ { 1, 0 },
+ { 2, 1 },
+ { 2, 3 }, };
+
+ pio = ide_get_best_pio_mode(drive, pio, 5, NULL);
+ pci_read_config_word(HWIF(drive)->pci_dev, master_port, &master_data);
+ if (is_slave) {
+ master_data = master_data | 0x4000;
+ if (pio > 1)
+ /* enable PPE, IE and TIME */
+ master_data = master_data | 0x0070;
+ pci_read_config_byte(HWIF(drive)->pci_dev, slave_port, &slave_data);
+ slave_data = slave_data & (HWIF(drive)->index ? 0x0f : 0xf0);
+ slave_data = slave_data | (((timings[pio][0] << 2) | timings[pio][1])
+ << (HWIF(drive)->index ? 4 : 0));
+ } else {
+ master_data = master_data & 0xccf8;
+ if (pio > 1)
+ /* enable PPE, IE and TIME */
+ master_data = master_data | 0x0007;
+ master_data = master_data | (timings[pio][0] << 12) |
+ (timings[pio][1] << 8);
+ }
+ save_flags(flags);
+ cli();
+ pci_write_config_word(HWIF(drive)->pci_dev, master_port, master_data);
+ if (is_slave)
+ pci_write_config_byte(HWIF(drive)->pci_dev, slave_port, slave_data);
+ restore_flags(flags);
+}
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING)
+static int piix_tune_chipset (ide_drive_t *drive, byte speed)
+{
+ ide_hwif_t *hwif = HWIF(drive);
+ struct pci_dev *dev = hwif->pci_dev;
+ byte maslave = hwif->channel ? 0x42 : 0x40;
+ int a_speed = 3 << (drive->dn * 4);
+ int u_flag = 1 << drive->dn;
+ int v_flag = 0x01 << drive->dn;
+ int w_flag = 0x10 << drive->dn;
+ int u_speed = 0;
+ int err = 0;
+ int sitre;
+ short reg4042, reg44, reg48, reg4a, reg54;
+ byte reg55;
+
+ pci_read_config_word(dev, maslave, &reg4042);
+ sitre = (reg4042 & 0x4000) ? 1 : 0;
+ pci_read_config_word(dev, 0x44, &reg44);
+ pci_read_config_word(dev, 0x48, &reg48);
+ pci_read_config_word(dev, 0x4a, &reg4a);
+ pci_read_config_word(dev, 0x54, &reg54);
+ pci_read_config_byte(dev, 0x55, &reg55);
+
+ switch(speed) {
+ case XFER_UDMA_4:
+ case XFER_UDMA_2: u_speed = 2 << (drive->dn * 4); break;
+ case XFER_UDMA_5:
+ case XFER_UDMA_3:
+ case XFER_UDMA_1: u_speed = 1 << (drive->dn * 4); break;
+ case XFER_UDMA_0: u_speed = 0 << (drive->dn * 4); break;
+ case XFER_MW_DMA_2:
+ case XFER_MW_DMA_1:
+ case XFER_SW_DMA_2: break;
+ default: return -1;
+ }
+
+ if (speed >= XFER_UDMA_0) {
+ if (!(reg48 & u_flag))
+ pci_write_config_word(dev, 0x48, reg48|u_flag);
+ if (speed == XFER_UDMA_5) {
+ pci_write_config_byte(dev, 0x55, (byte) reg55|w_flag);
+ } else {
+ pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag);
+ }
+ if (!(reg4a & u_speed)) {
+ pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
+ pci_write_config_word(dev, 0x4a, reg4a|u_speed);
+ }
+ if (speed > XFER_UDMA_2) {
+ if (!(reg54 & v_flag)) {
+ pci_write_config_word(dev, 0x54, reg54|v_flag);
+ }
+ } else {
+ pci_write_config_word(dev, 0x54, reg54 & ~v_flag);
+ }
+ }
+ if (speed < XFER_UDMA_0) {
+ if (reg48 & u_flag)
+ pci_write_config_word(dev, 0x48, reg48 & ~u_flag);
+ if (reg4a & a_speed)
+ pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
+ if (reg54 & v_flag)
+ pci_write_config_word(dev, 0x54, reg54 & ~v_flag);
+ if (reg55 & w_flag)
+ pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag);
+ }
+
+ piix_tune_drive(drive, piix_dma_2_pio(speed));
+
+#if PIIX_DEBUG_DRIVE_INFO
+ printk("%s: %s drive%d\n", drive->name, ide_xfer_verbose(speed), drive->dn);
+#endif /* PIIX_DEBUG_DRIVE_INFO */
+ if (!drive->init_speed)
+ drive->init_speed = speed;
+ err = ide_config_drive_speed(drive, speed);
+ drive->current_speed = speed;
+ return err;
+}
+
+static int piix_config_drive_for_dma (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+ ide_hwif_t *hwif = HWIF(drive);
+ struct pci_dev *dev = hwif->pci_dev;
+ byte speed;
+
+ byte udma_66 = eighty_ninty_three(drive);
+ int ultra100 = ((dev->device == PCI_DEVICE_ID_INTEL_82801BA_8) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801BA_9) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801CA_10) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801CA_11) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801DB_11) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801E_11)) ? 1 : 0;
+ int ultra66 = ((ultra100) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801AA_1) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82372FB_1)) ? 1 : 0;
+ int ultra = ((ultra66) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82371AB) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82443MX_1) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82451NX) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801AB_1)) ? 1 : 0;
+
+ if ((id->dma_ultra & 0x0020) && (udma_66) && (ultra100)) {
+ speed = XFER_UDMA_5;
+ } else if ((id->dma_ultra & 0x0010) && (ultra)) {
+ speed = ((udma_66) && (ultra66)) ? XFER_UDMA_4 : XFER_UDMA_2;
+ } else if ((id->dma_ultra & 0x0008) && (ultra)) {
+ speed = ((udma_66) && (ultra66)) ? XFER_UDMA_3 : XFER_UDMA_1;
+ } else if ((id->dma_ultra & 0x0004) && (ultra)) {
+ speed = XFER_UDMA_2;
+ } else if ((id->dma_ultra & 0x0002) && (ultra)) {
+ speed = XFER_UDMA_1;
+ } else if ((id->dma_ultra & 0x0001) && (ultra)) {
+ speed = XFER_UDMA_0;
+ } else if (id->dma_mword & 0x0004) {
+ speed = XFER_MW_DMA_2;
+ } else if (id->dma_mword & 0x0002) {
+ speed = XFER_MW_DMA_1;
+ } else if (id->dma_1word & 0x0004) {
+ speed = XFER_SW_DMA_2;
+ } else {
+ speed = XFER_PIO_0 + ide_get_best_pio_mode(drive, 255, 5, NULL);
+ }
+
+ (void) piix_tune_chipset(drive, speed);
+
+ return ((int) ((id->dma_ultra >> 11) & 7) ? ide_dma_on :
+ ((id->dma_ultra >> 8) & 7) ? ide_dma_on :
+ ((id->dma_mword >> 8) & 7) ? ide_dma_on :
+ ((id->dma_1word >> 8) & 7) ? ide_dma_on :
+ ide_dma_off_quietly);
+}
+
+static void config_chipset_for_pio (ide_drive_t *drive)
+{
+ piix_tune_drive(drive, ide_get_best_pio_mode(drive, 255, 5, NULL));
+}
+
+static int config_drive_xfer_rate (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+ ide_dma_action_t dma_func = ide_dma_on;
+
+ if (id && (id->capability & 1) && HWIF(drive)->autodma) {
+ /* Consult the list of known "bad" drives */
+ if (ide_dmaproc(ide_dma_bad_drive, drive)) {
+ dma_func = ide_dma_off;
+ goto fast_ata_pio;
+ }
+ dma_func = ide_dma_off_quietly;
+ if (id->field_valid & 4) {
+ if (id->dma_ultra & 0x003F) {
+ /* Force if Capable UltraDMA */
+ dma_func = piix_config_drive_for_dma(drive);
+ if ((id->field_valid & 2) &&
+ (dma_func != ide_dma_on))
+ goto try_dma_modes;
+ }
+ } else if (id->field_valid & 2) {
+try_dma_modes:
+ if ((id->dma_mword & 0x0007) ||
+ (id->dma_1word & 0x007)) {
+ /* Force if Capable regular DMA modes */
+ dma_func = piix_config_drive_for_dma(drive);
+ if (dma_func != ide_dma_on)
+ goto no_dma_set;
+ }
+ } else if (ide_dmaproc(ide_dma_good_drive, drive)) {
+ if (id->eide_dma_time > 150) {
+ goto no_dma_set;
+ }
+ /* Consult the list of known "good" drives */
+ dma_func = piix_config_drive_for_dma(drive);
+ if (dma_func != ide_dma_on)
+ goto no_dma_set;
+ } else {
+ goto fast_ata_pio;
+ }
+ } else if ((id->capability & 8) || (id->field_valid & 2)) {
+fast_ata_pio:
+ dma_func = ide_dma_off_quietly;
+no_dma_set:
+ config_chipset_for_pio(drive);
+ }
+ return HWIF(drive)->dmaproc(dma_func, drive);
+}
+
+static int piix_dmaproc(ide_dma_action_t func, ide_drive_t *drive)
+{
+ switch (func) {
+ case ide_dma_check:
+ return config_drive_xfer_rate(drive);
+ default :
+ break;
+ }
+ /* Other cases are done by generic IDE-DMA code. */
+ return ide_dmaproc(func, drive);
+}
+#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */
+
+unsigned int __init pci_init_piix (struct pci_dev *dev, const char *name)
+{
+#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS)
+ if (!piix_proc) {
+ piix_proc = 1;
+ bmide_dev = dev;
+ piix_display_info = &piix_get_info;
+ }
+#endif /* DISPLAY_PIIX_TIMINGS && CONFIG_PROC_FS */
+ return 0;
+}
+
+/*
+ * Sheesh, someone at Intel needs to go read the ATA-4/5 T13 standards.
+ * It does not specify device detection, but channel!!!
+ * You determine later if bit 13 of word93 is set...
+ */
+unsigned int __init ata66_piix (ide_hwif_t *hwif)
+{
+ byte reg54h = 0, reg55h = 0, ata66 = 0;
+ byte mask = hwif->channel ? 0xc0 : 0x30;
+
+ pci_read_config_byte(hwif->pci_dev, 0x54, &reg54h);
+ pci_read_config_byte(hwif->pci_dev, 0x55, &reg55h);
+
+ ata66 = (reg54h & mask) ? 1 : 0;
+
+ return ata66;
+}
+
+void __init ide_init_piix (ide_hwif_t *hwif)
+{
+#ifndef CONFIG_IA64
+ if (!hwif->irq)
+ hwif->irq = hwif->channel ? 15 : 14;
+#endif /* CONFIG_IA64 */
+
+ if (hwif->pci_dev->device == PCI_DEVICE_ID_INTEL_82371MX) {
+ /* This is a painful system best to let it self tune for now */
+ return;
+ }
+
+ hwif->tuneproc = &piix_tune_drive;
+ hwif->drives[0].autotune = 1;
+ hwif->drives[1].autotune = 1;
+
+ if (!hwif->dma_base)
+ return;
+
+#ifndef CONFIG_BLK_DEV_IDEDMA
+ hwif->autodma = 0;
+#else /* CONFIG_BLK_DEV_IDEDMA */
+#ifdef CONFIG_PIIX_TUNING
+ if (!noautodma)
+ hwif->autodma = 1;
+ hwif->dmaproc = &piix_dmaproc;
+ hwif->speedproc = &piix_tune_chipset;
+#endif /* CONFIG_PIIX_TUNING */
+#endif /* !CONFIG_BLK_DEV_IDEDMA */
+}
diff --git a/xen/drivers/net/3c59x.c b/xen/drivers/net/3c59x.c
new file mode 100644
index 0000000000..9478920e5b
--- /dev/null
+++ b/xen/drivers/net/3c59x.c
@@ -0,0 +1,2989 @@
+/* EtherLinkXL.c: A 3Com EtherLink PCI III/XL ethernet driver for linux. */
+/*
+ Written 1996-1999 by Donald Becker.
+
+ This software may be used and distributed according to the terms
+ of the GNU General Public License, incorporated herein by reference.
+
+ This driver is for the 3Com "Vortex" and "Boomerang" series ethercards.
+ Members of the series include Fast EtherLink 3c590/3c592/3c595/3c597
+ and the EtherLink XL 3c900 and 3c905 cards.
+
+ Problem reports and questions should be directed to
+ vortex@scyld.com
+
+ The author may be reached as becker@scyld.com, or C/O
+ Scyld Computing Corporation
+ 410 Severn Ave., Suite 210
+ Annapolis MD 21403
+
+ Linux Kernel Additions:
+
+ 0.99H+lk0.9 - David S. Miller - softnet, PCI DMA updates
+ 0.99H+lk1.0 - Jeff Garzik <jgarzik@mandrakesoft.com>
+ Remove compatibility defines for kernel versions < 2.2.x.
+ Update for new 2.3.x module interface
+ LK1.1.2 (March 19, 2000)
+ * New PCI interface (jgarzik)
+
+ LK1.1.3 25 April 2000, Andrew Morton <andrewm@uow.edu.au>
+ - Merged with 3c575_cb.c
+ - Don't set RxComplete in boomerang interrupt enable reg
+ - spinlock in vortex_timer to protect mdio functions
+ - disable local interrupts around call to vortex_interrupt in
+ vortex_tx_timeout() (So vortex_interrupt can use spin_lock())
+ - Select window 3 in vortex_timer()'s write to Wn3_MAC_Ctrl
+ - In vortex_start_xmit(), move the lock to _after_ we've altered
+ vp->cur_tx and vp->tx_full. This defeats the race between
+ vortex_start_xmit() and vortex_interrupt which was identified
+ by Bogdan Costescu.
+ - Merged back support for six new cards from various sources
+ - Set vortex_have_pci if pci_module_init returns zero (fixes cardbus
+ insertion oops)
+ - Tell it that 3c905C has NWAY for 100bT autoneg
+ - Fix handling of SetStatusEnd in 'Too much work..' code, as
+ per 2.3.99's 3c575_cb (Dave Hinds).
+ - Split ISR into two for vortex & boomerang
+ - Fix MOD_INC/DEC races
+ - Handle resource allocation failures.
+ - Fix 3CCFE575CT LED polarity
+ - Make tx_interrupt_mitigation the default
+
+ LK1.1.4 25 April 2000, Andrew Morton <andrewm@uow.edu.au>
+ - Add extra TxReset to vortex_up() to fix 575_cb hotplug initialisation probs.
+ - Put vortex_info_tbl into __devinitdata
+ - In the vortex_error StatsFull HACK, disable stats in vp->intr_enable as well
+ as in the hardware.
+ - Increased the loop counter in issue_and_wait from 2,000 to 4,000.
+
+ LK1.1.5 28 April 2000, andrewm
+ - Added powerpc defines (John Daniel <jdaniel@etresoft.com> said these work...)
+ - Some extra diagnostics
+ - In vortex_error(), reset the Tx on maxCollisions. Otherwise most
+ chips usually get a Tx timeout.
+ - Added extra_reset module parm
+ - Replaced some inline timer manip with mod_timer
+ (Franois romieu <Francois.Romieu@nic.fr>)
+ - In vortex_up(), don't make Wn3_config initialisation dependent upon has_nway
+ (this came across from 3c575_cb).
+
+ LK1.1.6 06 Jun 2000, andrewm
+ - Backed out the PPC defines.
+ - Use del_timer_sync(), mod_timer().
+ - Fix wrapped ulong comparison in boomerang_rx()
+ - Add IS_TORNADO, use it to suppress 3c905C checksum error msg
+ (Donald Becker, I Lee Hetherington <ilh@sls.lcs.mit.edu>)
+ - Replace union wn3_config with BFINS/BFEXT manipulation for
+ sparc64 (Pete Zaitcev, Peter Jones)
+ - In vortex_error, do_tx_reset and vortex_tx_timeout(Vortex):
+ do a netif_wake_queue() to better recover from errors. (Anders Pedersen,
+ Donald Becker)
+ - Print a warning on out-of-memory (rate limited to 1 per 10 secs)
+ - Added two more Cardbus 575 NICs: 5b57 and 6564 (Paul Wagland)
+
+ LK1.1.7 2 Jul 2000 andrewm
+ - Better handling of shared IRQs
+ - Reset the transmitter on a Tx reclaim error
+ - Fixed crash under OOM during vortex_open() (Mark Hemment)
+ - Fix Rx cessation problem during OOM (help from Mark Hemment)
+ - The spinlocks around the mdio access were blocking interrupts for 300uS.
+ Fix all this to use spin_lock_bh() within mdio_read/write
+ - Only write to TxFreeThreshold if it's a boomerang - other NICs don't
+ have one.
+ - Added 802.3x MAC-layer flow control support
+
+ LK1.1.8 13 Aug 2000 andrewm
+ - Ignore request_region() return value - already reserved if Cardbus.
+ - Merged some additional Cardbus flags from Don's 0.99Qk
+ - Some fixes for 3c556 (Fred Maciel)
+ - Fix for EISA initialisation (Jan Rekorajski)
+ - Renamed MII_XCVR_PWR and EEPROM_230 to align with 3c575_cb and D. Becker's drivers
+ - Fixed MII_XCVR_PWR for 3CCFE575CT
+ - Added INVERT_LED_PWR, used it.
+ - Backed out the extra_reset stuff
+
+ LK1.1.9 12 Sep 2000 andrewm
+ - Backed out the tx_reset_resume flags. It was a no-op.
+ - In vortex_error, don't reset the Tx on txReclaim errors
+ - In vortex_error, don't reset the Tx on maxCollisions errors.
+ Hence backed out all the DownListPtr logic here.
+ - In vortex_error, give Tornado cards a partial TxReset on
+ maxCollisions (David Hinds). Defined MAX_COLLISION_RESET for this.
+ - Redid some driver flags and device names based on pcmcia_cs-3.1.20.
+ - Fixed a bug where, if vp->tx_full is set when the interface
+ is downed, it remains set when the interface is upped. Bad
+ things happen.
+
+ LK1.1.10 17 Sep 2000 andrewm
+ - Added EEPROM_8BIT for 3c555 (Fred Maciel)
+ - Added experimental support for the 3c556B Laptop Hurricane (Louis Gerbarg)
+ - Add HAS_NWAY to "3c900 Cyclone 10Mbps TPO"
+
+ LK1.1.11 13 Nov 2000 andrewm
+ - Dump MOD_INC/DEC_USE_COUNT, use SET_MODULE_OWNER
+
+ LK1.1.12 1 Jan 2001 andrewm (2.4.0-pre1)
+ - Call pci_enable_device before we request our IRQ (Tobias Ringstrom)
+ - Add 3c590 PCI latency timer hack to vortex_probe1 (from 0.99Ra)
+ - Added extended issue_and_wait for the 3c905CX.
+ - Look for an MII on PHY index 24 first (3c905CX oddity).
+ - Add HAS_NWAY to 3cSOHO100-TX (Brett Frankenberger)
+ - Don't free skbs we don't own on oom path in vortex_open().
+
+ LK1.1.13 27 Jan 2001
+ - Added explicit `medialock' flag so we can truly
+ lock the media type down with `options'.
+ - "check ioremap return and some tidbits" (Arnaldo Carvalho de Melo <acme@conectiva.com.br>)
+ - Added and used EEPROM_NORESET for 3c556B PM resumes.
+ - Fixed leakage of vp->rx_ring.
+ - Break out separate HAS_HWCKSM device capability flag.
+ - Kill vp->tx_full (ANK)
+ - Merge zerocopy fragment handling (ANK?)
+
+ LK1.1.14 15 Feb 2001
+ - Enable WOL. Can be turned on with `enable_wol' module option.
+ - EISA and PCI initialisation fixes (jgarzik, Manfred Spraul)
+ - If a device's internalconfig register reports it has NWAY,
+ use it, even if autoselect is enabled.
+
+ LK1.1.15 6 June 2001 akpm
+ - Prevent double counting of received bytes (Lars Christensen)
+ - Add ethtool support (jgarzik)
+ - Add module parm descriptions (Andrzej M. Krzysztofowicz)
+ - Implemented alloc_etherdev() API
+ - Special-case the 'Tx error 82' message.
+
+ LK1.1.16 18 July 2001 akpm
+ - Make NETIF_F_SG dependent upon nr_free_highpages(), not on CONFIG_HIGHMEM
+ - Lessen verbosity of bootup messages
+ - Fix WOL - use new PM API functions.
+ - Use netif_running() instead of vp->open in suspend/resume.
+ - Don't reset the interface logic on open/close/rmmod. It upsets
+ autonegotiation, and hence DHCP (from 0.99T).
+ - Back out EEPROM_NORESET flag because of the above (we do it for all
+ NICs).
+ - Correct 3c982 identification string
+ - Rename wait_for_completion() to issue_and_wait() to avoid completion.h
+ clash.
+
+ - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details.
+ - Also see Documentation/networking/vortex.txt
+*/
+
+/*
+ * FIXME: This driver _could_ support MTU changing, but doesn't. See Don's hamachi.c implementation
+ * as well as other drivers
+ *
+ * NOTE: If you make 'vortex_debug' a constant (#define vortex_debug 0) the driver shrinks by 2k
+ * due to dead code elimination. There will be some performance benefits from this due to
+ * elimination of all the tests and reduced cache footprint.
+ */
+
+
+#define DRV_NAME "3c59x"
+#define DRV_VERSION "LK1.1.16"
+#define DRV_RELDATE "19 July 2001"
+
+
+/* "Knobs" that adjust features and parameters. */
+/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
+ Setting to > 1512 effectively disables this feature. */
+/*#ifndef __arm__
+static const int rx_copybreak = 200;
+#else*/
+/* ARM systems perform better by disregarding the bus-master
+ transfer capability of these cards. -- rmk */
+/*static const int rx_copybreak = 1513;
+#endif*/
+static const int rx_copybreak = 0; /* Xen doesn't copybreak in drivers. */
+
+/* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */
+static const int mtu = 1500;
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static int max_interrupt_work = 32;
+/* Tx timeout interval (millisecs) */
+static int watchdog = 5000;
+
+/* Allow aggregation of Tx interrupts. Saves CPU load at the cost
+ * of possible Tx stalls if the system is blocking interrupts
+ * somewhere else. Undefine this to disable.
+ */
+#define tx_interrupt_mitigation 1
+
+/* Put out somewhat more debugging messages. (0: no msg, 1 minimal .. 6). */
+#define vortex_debug debug
+#ifdef VORTEX_DEBUG
+static int vortex_debug = VORTEX_DEBUG;
+#else
+static int vortex_debug = 1;
+#endif
+
+#ifndef __OPTIMIZE__
+#error You must compile this file with the correct options!
+#error See the last lines of the source file.
+#error You must compile this driver with "-O".
+#endif
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/module.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+//#include <linux/in.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/mii.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+//#include <linux/highmem.h>
+#include <asm/irq.h> /* For NR_IRQS only. */
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+/* A few values that may be tweaked. */
+/* Keep the ring sizes a power of two for efficiency. */
+#undef TX_RING_SIZE
+#undef RX_RING_SIZE
+#define TX_RING_SIZE 16
+#define RX_RING_SIZE 32
+#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/
+
+/* Kernel compatibility defines, some common to David Hinds' PCMCIA package.
+ This is only in the support-all-kernels source code. */
+
+#define RUN_AT(x) (jiffies + (x))
+
+#include <linux/delay.h>
+
+
+static char version[] __devinitdata =
+DRV_NAME ": Donald Becker and others. www.scyld.com/network/vortex.html\n";
+
+MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
+MODULE_DESCRIPTION("3Com 3c59x/3c9xx ethernet driver "
+ DRV_VERSION " " DRV_RELDATE);
+MODULE_LICENSE("GPL");
+
+MODULE_PARM(debug, "i");
+MODULE_PARM(options, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(full_duplex, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(hw_checksums, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(flow_ctrl, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(enable_wol, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(rx_copybreak, "i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM(compaq_ioaddr, "i");
+MODULE_PARM(compaq_irq, "i");
+MODULE_PARM(compaq_device_id, "i");
+MODULE_PARM(watchdog, "i");
+MODULE_PARM_DESC(debug, "3c59x debug level (0-6)");
+MODULE_PARM_DESC(options, "3c59x: Bits 0-3: media type, bit 4: bus mastering, bit 9: full duplex");
+MODULE_PARM_DESC(full_duplex, "3c59x full duplex setting(s) (1)");
+MODULE_PARM_DESC(hw_checksums, "3c59x Hardware checksum checking by adapter(s) (0-1)");
+MODULE_PARM_DESC(flow_ctrl, "3c59x 802.3x flow control usage (PAUSE only) (0-1)");
+MODULE_PARM_DESC(enable_wol, "3c59x: Turn on Wake-on-LAN for adapter(s) (0-1)");
+MODULE_PARM_DESC(rx_copybreak, "3c59x copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(max_interrupt_work, "3c59x maximum events handled per interrupt");
+MODULE_PARM_DESC(compaq_ioaddr, "3c59x PCI I/O base address (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(compaq_irq, "3c59x PCI IRQ number (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(compaq_device_id, "3c59x PCI device ID (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(watchdog, "3c59x transmit timeout in milliseconds");
+
+/* Operational parameter that usually are not changed. */
+
+/* The Vortex size is twice that of the original EtherLinkIII series: the
+ runtime register window, window 1, is now always mapped in.
+ The Boomerang size is twice as large as the Vortex -- it has additional
+ bus master control registers. */
+#define VORTEX_TOTAL_SIZE 0x20
+#define BOOMERANG_TOTAL_SIZE 0x40
+
+/* Set iff a MII transceiver on any interface requires mdio preamble.
+ This only set with the original DP83840 on older 3c905 boards, so the extra
+ code size of a per-interface flag is not worthwhile. */
+static char mii_preamble_required;
+
+#define PFX DRV_NAME ": "
+
+
+
+/*
+ Theory of Operation
+
+I. Board Compatibility
+
+This device driver is designed for the 3Com FastEtherLink and FastEtherLink
+XL, 3Com's PCI to 10/100baseT adapters. It also works with the 10Mbs
+versions of the FastEtherLink cards. The supported product IDs are
+ 3c590, 3c592, 3c595, 3c597, 3c900, 3c905
+
+The related ISA 3c515 is supported with a separate driver, 3c515.c, included
+with the kernel source or available from
+ cesdis.gsfc.nasa.gov:/pub/linux/drivers/3c515.html
+
+II. Board-specific settings
+
+PCI bus devices are configured by the system at boot time, so no jumpers
+need to be set on the board. The system BIOS should be set to assign the
+PCI INTA signal to an otherwise unused system IRQ line.
+
+The EEPROM settings for media type and forced-full-duplex are observed.
+The EEPROM media type should be left at the default "autoselect" unless using
+10base2 or AUI connections which cannot be reliably detected.
+
+III. Driver operation
+
+The 3c59x series use an interface that's very similar to the previous 3c5x9
+series. The primary interface is two programmed-I/O FIFOs, with an
+alternate single-contiguous-region bus-master transfer (see next).
+
+The 3c900 "Boomerang" series uses a full-bus-master interface with separate
+lists of transmit and receive descriptors, similar to the AMD LANCE/PCnet,
+DEC Tulip and Intel Speedo3. The first chip version retains a compatible
+programmed-I/O interface that has been removed in 'B' and subsequent board
+revisions.
+
+One extension that is advertised in a very large font is that the adapters
+are capable of being bus masters. On the Vortex chip this capability was
+only for a single contiguous region making it far less useful than the full
+bus master capability. There is a significant performance impact of taking
+an extra interrupt or polling for the completion of each transfer, as well
+as difficulty sharing the single transfer engine between the transmit and
+receive threads. Using DMA transfers is a win only with large blocks or
+with the flawed versions of the Intel Orion motherboard PCI controller.
+
+The Boomerang chip's full-bus-master interface is useful, and has the
+currently-unused advantages over other similar chips that queued transmit
+packets may be reordered and receive buffer groups are associated with a
+single frame.
+
+With full-bus-master support, this driver uses a "RX_COPYBREAK" scheme.
+Rather than a fixed intermediate receive buffer, this scheme allocates
+full-sized skbuffs as receive buffers. The value RX_COPYBREAK is used as
+the copying breakpoint: it is chosen to trade-off the memory wasted by
+passing the full-sized skbuff to the queue layer for all frames vs. the
+copying cost of copying a frame to a correctly-sized skbuff.
+
+IIIC. Synchronization
+The driver runs as two independent, single-threaded flows of control. One
+is the send-packet routine, which enforces single-threaded use by the
+dev->tbusy flag. The other thread is the interrupt handler, which is single
+threaded by the hardware and other software.
+
+IV. Notes
+
+Thanks to Cameron Spitzer and Terry Murphy of 3Com for providing development
+3c590, 3c595, and 3c900 boards.
+The name "Vortex" is the internal 3Com project name for the PCI ASIC, and
+the EISA version is called "Demon". According to Terry these names come
+from rides at the local amusement park.
+
+The new chips support both ethernet (1.5K) and FDDI (4.5K) packet sizes!
+This driver only supports ethernet packets because of the skbuff allocation
+limit of 4K.
+*/
+
+/* This table drives the PCI probe routines. It's mostly boilerplate in all
+ of the drivers, and will likely be provided by some future kernel.
+*/
+enum pci_flags_bit {
+ PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
+ PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
+};
+
+enum { IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8,
+ EEPROM_8BIT=0x10, /* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */
+ HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100,
+ INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_RESET=0x800,
+ EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000 };
+
+enum vortex_chips {
+ CH_3C590 = 0,
+ CH_3C592,
+ CH_3C597,
+ CH_3C595_1,
+ CH_3C595_2,
+
+ CH_3C595_3,
+ CH_3C900_1,
+ CH_3C900_2,
+ CH_3C900_3,
+ CH_3C900_4,
+
+ CH_3C900_5,
+ CH_3C900B_FL,
+ CH_3C905_1,
+ CH_3C905_2,
+ CH_3C905B_1,
+
+ CH_3C905B_2,
+ CH_3C905B_FX,
+ CH_3C905C,
+ CH_3C980,
+ CH_3C9805,
+
+ CH_3CSOHO100_TX,
+ CH_3C555,
+ CH_3C556,
+ CH_3C556B,
+ CH_3C575,
+
+ CH_3C575_1,
+ CH_3CCFE575,
+ CH_3CCFE575CT,
+ CH_3CCFE656,
+ CH_3CCFEM656,
+
+ CH_3CCFEM656_1,
+ CH_3C450,
+};
+
+
+/* note: this array directly indexed by above enums, and MUST
+ * be kept in sync with both the enums above, and the PCI device
+ * table below
+ */
+static struct vortex_chip_info {
+ const char *name;
+ int flags;
+ int drv_flags;
+ int io_size;
+} vortex_info_tbl[] __devinitdata = {
+#define EISA_TBL_OFFSET 0 /* Offset of this entry for vortex_eisa_init */
+ {"3c590 Vortex 10Mbps",
+ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+ {"3c592 EISA 10Mbps Demon/Vortex", /* AKPM: from Don's 3c59x_cb.c 0.49H */
+ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+ {"3c597 EISA Fast Demon/Vortex", /* AKPM: from Don's 3c59x_cb.c 0.49H */
+ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+ {"3c595 Vortex 100baseTx",
+ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+ {"3c595 Vortex 100baseT4",
+ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+
+ {"3c595 Vortex 100base-MII",
+ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+ {"3c900 Boomerang 10baseT",
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
+ {"3c900 Boomerang 10Mbps Combo",
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
+ {"3c900 Cyclone 10Mbps TPO", /* AKPM: from Don's 0.99M */
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+ {"3c900 Cyclone 10Mbps Combo",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+
+ {"3c900 Cyclone 10Mbps TPC", /* AKPM: from Don's 0.99M */
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+ {"3c900B-FL Cyclone 10base-FL",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+ {"3c905 Boomerang 100baseTx",
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
+ {"3c905 Boomerang 100baseT4",
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
+ {"3c905B Cyclone 100baseTx",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+
+ {"3c905B Cyclone 10/100/BNC",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+ {"3c905B-FX Cyclone 100baseFx",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+ {"3c905C Tornado",
+ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
+ {"3c980 Cyclone",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+ {"3c982 Dual Port Server Cyclone",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+
+ {"3cSOHO100-TX Hurricane",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+ {"3c555 Laptop Hurricane",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|EEPROM_8BIT|HAS_HWCKSM, 128, },
+ {"3c556 Laptop Tornado",
+ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_8BIT|HAS_CB_FNS|INVERT_MII_PWR|
+ HAS_HWCKSM, 128, },
+ {"3c556B Laptop Hurricane",
+ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_OFFSET|HAS_CB_FNS|INVERT_MII_PWR|
+ HAS_HWCKSM, 128, },
+ {"3c575 [Megahertz] 10/100 LAN CardBus",
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+
+ {"3c575 Boomerang CardBus",
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+ {"3CCFE575BT Cyclone CardBus",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|
+ INVERT_LED_PWR|HAS_HWCKSM, 128, },
+ {"3CCFE575CT Tornado CardBus",
+ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+ MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
+ {"3CCFE656 Cyclone CardBus",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+ INVERT_LED_PWR|HAS_HWCKSM, 128, },
+ {"3CCFEM656B Cyclone+Winmodem CardBus",
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+ INVERT_LED_PWR|HAS_HWCKSM, 128, },
+
+ {"3CXFEM656C Tornado+Winmodem CardBus", /* From pcmcia-cs-3.1.5 */
+ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+ MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
+ {"3c450 HomePNA Tornado", /* AKPM: from Don's 0.99Q */
+ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
+ {0,}, /* 0 terminated list. */
+};
+
+
+static struct pci_device_id vortex_pci_tbl[] __devinitdata = {
+ { 0x10B7, 0x5900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C590 },
+ { 0x10B7, 0x5920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C592 },
+ { 0x10B7, 0x5970, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C597 },
+ { 0x10B7, 0x5950, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_1 },
+ { 0x10B7, 0x5951, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_2 },
+
+ { 0x10B7, 0x5952, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_3 },
+ { 0x10B7, 0x9000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_1 },
+ { 0x10B7, 0x9001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_2 },
+ { 0x10B7, 0x9004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_3 },
+ { 0x10B7, 0x9005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_4 },
+
+ { 0x10B7, 0x9006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_5 },
+ { 0x10B7, 0x900A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900B_FL },
+ { 0x10B7, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_1 },
+ { 0x10B7, 0x9051, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_2 },
+ { 0x10B7, 0x9055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_1 },
+
+ { 0x10B7, 0x9058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_2 },
+ { 0x10B7, 0x905A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_FX },
+ { 0x10B7, 0x9200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905C },
+ { 0x10B7, 0x9800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C980 },
+ { 0x10B7, 0x9805, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C9805 },
+
+ { 0x10B7, 0x7646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CSOHO100_TX },
+ { 0x10B7, 0x5055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C555 },
+ { 0x10B7, 0x6055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556 },
+ { 0x10B7, 0x6056, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556B },
+ { 0x10B7, 0x5b57, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575 },
+
+ { 0x10B7, 0x5057, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575_1 },
+ { 0x10B7, 0x5157, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575 },
+ { 0x10B7, 0x5257, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575CT },
+ { 0x10B7, 0x6560, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE656 },
+ { 0x10B7, 0x6562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656 },
+
+ { 0x10B7, 0x6564, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656_1 },
+ { 0x10B7, 0x4500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C450 },
+ {0,} /* 0 terminated list. */
+};
+MODULE_DEVICE_TABLE(pci, vortex_pci_tbl);
+
+
+/* Operational definitions.
+ These are not used by other compilation units and thus are not
+ exported in a ".h" file.
+
+ First the windows. There are eight register windows, with the command
+ and status registers available in each.
+ */
+#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD)
+#define EL3_CMD 0x0e
+#define EL3_STATUS 0x0e
+
+/* The top five bits written to EL3_CMD are a command, the lower
+ 11 bits are the parameter, if applicable.
+ Note that 11 parameters bits was fine for ethernet, but the new chip
+ can handle FDDI length frames (~4500 octets) and now parameters count
+ 32-bit 'Dwords' rather than octets. */
+
+enum vortex_cmd {
+ TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11,
+ RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11,
+ UpStall = 6<<11, UpUnstall = (6<<11)+1,
+ DownStall = (6<<11)+2, DownUnstall = (6<<11)+3,
+ RxDiscard = 8<<11, TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11,
+ FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11,
+ SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11,
+ SetTxThreshold = 18<<11, SetTxStart = 19<<11,
+ StartDMAUp = 20<<11, StartDMADown = (20<<11)+1, StatsEnable = 21<<11,
+ StatsDisable = 22<<11, StopCoax = 23<<11, SetFilterBit = 25<<11,};
+
+/* The SetRxFilter command accepts the following classes: */
+enum RxFilter {
+ RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 };
+
+/* Bits in the general status register. */
+enum vortex_status {
+ IntLatch = 0x0001, HostError = 0x0002, TxComplete = 0x0004,
+ TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020,
+ IntReq = 0x0040, StatsFull = 0x0080,
+ DMADone = 1<<8, DownComplete = 1<<9, UpComplete = 1<<10,
+ DMAInProgress = 1<<11, /* DMA controller is still busy.*/
+ CmdInProgress = 1<<12, /* EL3_CMD is still busy.*/
+};
+
+/* Register window 1 offsets, the window used in normal operation.
+ On the Vortex this window is always mapped at offsets 0x10-0x1f. */
+enum Window1 {
+ TX_FIFO = 0x10, RX_FIFO = 0x10, RxErrors = 0x14,
+ RxStatus = 0x18, Timer=0x1A, TxStatus = 0x1B,
+ TxFree = 0x1C, /* Remaining free bytes in Tx buffer. */
+};
+enum Window0 {
+ Wn0EepromCmd = 10, /* Window 0: EEPROM command register. */
+ Wn0EepromData = 12, /* Window 0: EEPROM results register. */
+ IntrStatus=0x0E, /* Valid in all windows. */
+};
+enum Win0_EEPROM_bits {
+ EEPROM_Read = 0x80, EEPROM_WRITE = 0x40, EEPROM_ERASE = 0xC0,
+ EEPROM_EWENB = 0x30, /* Enable erasing/writing for 10 msec. */
+ EEPROM_EWDIS = 0x00, /* Disable EWENB before 10 msec timeout. */
+};
+/* EEPROM locations. */
+enum eeprom_offset {
+ PhysAddr01=0, PhysAddr23=1, PhysAddr45=2, ModelID=3,
+ EtherLink3ID=7, IFXcvrIO=8, IRQLine=9,
+ NodeAddr01=10, NodeAddr23=11, NodeAddr45=12,
+ DriverTune=13, Checksum=15};
+
+enum Window2 { /* Window 2. */
+ Wn2_ResetOptions=12,
+};
+enum Window3 { /* Window 3: MAC/config bits. */
+ Wn3_Config=0, Wn3_MAC_Ctrl=6, Wn3_Options=8,
+};
+
+#define BFEXT(value, offset, bitcount) \
+ ((((unsigned long)(value)) >> (offset)) & ((1 << (bitcount)) - 1))
+
+#define BFINS(lhs, rhs, offset, bitcount) \
+ (((lhs) & ~((((1 << (bitcount)) - 1)) << (offset))) | \
+ (((rhs) & ((1 << (bitcount)) - 1)) << (offset)))
+
+#define RAM_SIZE(v) BFEXT(v, 0, 3)
+#define RAM_WIDTH(v) BFEXT(v, 3, 1)
+#define RAM_SPEED(v) BFEXT(v, 4, 2)
+#define ROM_SIZE(v) BFEXT(v, 6, 2)
+#define RAM_SPLIT(v) BFEXT(v, 16, 2)
+#define XCVR(v) BFEXT(v, 20, 4)
+#define AUTOSELECT(v) BFEXT(v, 24, 1)
+
+enum Window4 { /* Window 4: Xcvr/media bits. */
+ Wn4_FIFODiag = 4, Wn4_NetDiag = 6, Wn4_PhysicalMgmt=8, Wn4_Media = 10,
+};
+enum Win4_Media_bits {
+ Media_SQE = 0x0008, /* Enable SQE error counting for AUI. */
+ Media_10TP = 0x00C0, /* Enable link beat and jabber for 10baseT. */
+ Media_Lnk = 0x0080, /* Enable just link beat for 100TX/100FX. */
+ Media_LnkBeat = 0x0800,
+};
+enum Window7 { /* Window 7: Bus Master control. */
+ Wn7_MasterAddr = 0, Wn7_MasterLen = 6, Wn7_MasterStatus = 12,
+};
+/* Boomerang bus master control registers. */
+enum MasterCtrl {
+ PktStatus = 0x20, DownListPtr = 0x24, FragAddr = 0x28, FragLen = 0x2c,
+ TxFreeThreshold = 0x2f, UpPktStatus = 0x30, UpListPtr = 0x38,
+};
+
+/* The Rx and Tx descriptor lists.
+ Caution Alpha hackers: these types are 32 bits! Note also the 8 byte
+ alignment contraint on tx_ring[] and rx_ring[]. */
+#define LAST_FRAG 0x80000000 /* Last Addr/Len pair in descriptor. */
+#define DN_COMPLETE 0x00010000 /* This packet has been downloaded */
+struct boom_rx_desc {
+ u32 next; /* Last entry points to 0. */
+ s32 status;
+ u32 addr; /* Up to 63 addr/len pairs possible. */
+ s32 length; /* Set LAST_FRAG to indicate last pair. */
+};
+/* Values for the Rx status entry. */
+enum rx_desc_status {
+ RxDComplete=0x00008000, RxDError=0x4000,
+ /* See boomerang_rx() for actual error bits */
+ IPChksumErr=1<<25, TCPChksumErr=1<<26, UDPChksumErr=1<<27,
+ IPChksumValid=1<<29, TCPChksumValid=1<<30, UDPChksumValid=1<<31,
+};
+
+#ifdef MAX_SKB_FRAGS
+#define DO_ZEROCOPY 1
+#else
+#define DO_ZEROCOPY 0
+#endif
+
+struct boom_tx_desc {
+ u32 next; /* Last entry points to 0. */
+ s32 status; /* bits 0:12 length, others see below. */
+#if DO_ZEROCOPY
+ struct {
+ u32 addr;
+ s32 length;
+ } frag[1+MAX_SKB_FRAGS];
+#else
+ u32 addr;
+ s32 length;
+#endif
+};
+
+/* Values for the Tx status entry. */
+enum tx_desc_status {
+ CRCDisable=0x2000, TxDComplete=0x8000,
+ AddIPChksum=0x02000000, AddTCPChksum=0x04000000, AddUDPChksum=0x08000000,
+ TxIntrUploaded=0x80000000, /* IRQ when in FIFO, but maybe not sent. */
+};
+
+/* Chip features we care about in vp->capabilities, read from the EEPROM. */
+enum ChipCaps { CapBusMaster=0x20, CapPwrMgmt=0x2000 };
+
+struct vortex_private {
+ /* The Rx and Tx rings should be quad-word-aligned. */
+ struct boom_rx_desc* rx_ring;
+ struct boom_tx_desc* tx_ring;
+ dma_addr_t rx_ring_dma;
+ dma_addr_t tx_ring_dma;
+ /* The addresses of transmit- and receive-in-place skbuffs. */
+ struct sk_buff* rx_skbuff[RX_RING_SIZE];
+ struct sk_buff* tx_skbuff[TX_RING_SIZE];
+ struct net_device *next_module; /* NULL if PCI device */
+ unsigned int cur_rx, cur_tx; /* The next free ring entry */
+ unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */
+ struct net_device_stats stats;
+ struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */
+ dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */
+
+ /* PCI configuration space information. */
+ struct pci_dev *pdev;
+ char *cb_fn_base; /* CardBus function status addr space. */
+
+ /* Some values here only for performance evaluation and path-coverage */
+ int rx_nocopy, rx_copy, queued_packet, rx_csumhits;
+ int card_idx;
+
+ /* The remainder are related to chip state, mostly media selection. */
+ struct timer_list timer; /* Media selection timer. */
+ struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */
+ int options; /* User-settable misc. driver options. */
+ unsigned int media_override:4, /* Passed-in media type. */
+ default_media:4, /* Read from the EEPROM/Wn3_Config. */
+ full_duplex:1, force_fd:1, autoselect:1,
+ bus_master:1, /* Vortex can only do a fragment bus-m. */
+ full_bus_master_tx:1, full_bus_master_rx:2, /* Boomerang */
+ flow_ctrl:1, /* Use 802.3x flow control (PAUSE only) */
+ partner_flow_ctrl:1, /* Partner supports flow control */
+ has_nway:1,
+ enable_wol:1, /* Wake-on-LAN is enabled */
+ pm_state_valid:1, /* power_state[] has sane contents */
+ open:1,
+ medialock:1,
+ must_free_region:1; /* Flag: if zero, Cardbus owns the I/O region */
+ int drv_flags;
+ u16 status_enable;
+ u16 intr_enable;
+ u16 available_media; /* From Wn3_Options. */
+ u16 capabilities, info1, info2; /* Various, from EEPROM. */
+ u16 advertising; /* NWay media advertisement */
+ unsigned char phys[2]; /* MII device addresses. */
+ u16 deferred; /* Resend these interrupts when we
+ * bale from the ISR */
+ u16 io_size; /* Size of PCI region (for release_region) */
+ spinlock_t lock; /* Serialise access to device & its vortex_private */
+ spinlock_t mdio_lock; /* Serialise access to mdio hardware */
+ u32 power_state[16];
+};
+
+/* The action to take with a media selection timer tick.
+ Note that we deviate from the 3Com order by checking 10base2 before AUI.
+ */
+enum xcvr_types {
+ XCVR_10baseT=0, XCVR_AUI, XCVR_10baseTOnly, XCVR_10base2, XCVR_100baseTx,
+ XCVR_100baseFx, XCVR_MII=6, XCVR_NWAY=8, XCVR_ExtMII=9, XCVR_Default=10,
+};
+
+static struct media_table {
+ char *name;
+ unsigned int media_bits:16, /* Bits to set in Wn4_Media register. */
+ mask:8, /* The transceiver-present bit in Wn3_Config.*/
+ next:8; /* The media type to try next. */
+ int wait; /* Time before we check media status. */
+} media_tbl[] = {
+ { "10baseT", Media_10TP,0x08, XCVR_10base2, (14*HZ)/10},
+ { "10Mbs AUI", Media_SQE, 0x20, XCVR_Default, (1*HZ)/10},
+ { "undefined", 0, 0x80, XCVR_10baseT, 10000},
+ { "10base2", 0, 0x10, XCVR_AUI, (1*HZ)/10},
+ { "100baseTX", Media_Lnk, 0x02, XCVR_100baseFx, (14*HZ)/10},
+ { "100baseFX", Media_Lnk, 0x04, XCVR_MII, (14*HZ)/10},
+ { "MII", 0, 0x41, XCVR_10baseT, 3*HZ },
+ { "undefined", 0, 0x01, XCVR_10baseT, 10000},
+ { "Autonegotiate", 0, 0x41, XCVR_10baseT, 3*HZ},
+ { "MII-External", 0, 0x41, XCVR_10baseT, 3*HZ },
+ { "Default", 0, 0xFF, XCVR_10baseT, 10000},
+};
+
+static int vortex_probe1(struct pci_dev *pdev, long ioaddr, int irq,
+ int chip_idx, int card_idx);
+static void vortex_up(struct net_device *dev);
+static void vortex_down(struct net_device *dev);
+static int vortex_open(struct net_device *dev);
+static void mdio_sync(long ioaddr, int bits);
+static int mdio_read(struct net_device *dev, int phy_id, int location);
+static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
+static void vortex_timer(unsigned long arg);
+static void rx_oom_timer(unsigned long arg);
+static int vortex_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int vortex_rx(struct net_device *dev);
+static int boomerang_rx(struct net_device *dev);
+static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static int vortex_close(struct net_device *dev);
+static void dump_tx_ring(struct net_device *dev);
+static void update_stats(long ioaddr, struct net_device *dev);
+static struct net_device_stats *vortex_get_stats(struct net_device *dev);
+static void set_rx_mode(struct net_device *dev);
+static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static void vortex_tx_timeout(struct net_device *dev);
+static void acpi_set_WOL(struct net_device *dev);
+
+/* This driver uses 'options' to pass the media type, full-duplex flag, etc. */
+/* Option count limit only -- unlimited interfaces are supported. */
+#define MAX_UNITS 8
+static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1,};
+static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int hw_checksums[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int flow_ctrl[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int enable_wol[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+/* #define dev_alloc_skb dev_alloc_skb_debug */
+
+/* A list of all installed Vortex EISA devices, for removing the driver module. */
+static struct net_device *root_vortex_eisa_dev;
+
+/* Variables to work-around the Compaq PCI BIOS32 problem. */
+static int compaq_ioaddr, compaq_irq, compaq_device_id = 0x5900;
+
+static int vortex_cards_found;
+
+#ifdef CONFIG_PM
+
+static int vortex_suspend (struct pci_dev *pdev, u32 state)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+ if (dev && dev->priv) {
+ if (netif_running(dev)) {
+ netif_device_detach(dev);
+ vortex_down(dev);
+ }
+ }
+ return 0;
+}
+
+static int vortex_resume (struct pci_dev *pdev)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+ if (dev && dev->priv) {
+ if (netif_running(dev)) {
+ vortex_up(dev);
+ netif_device_attach(dev);
+ }
+ }
+ return 0;
+}
+
+#endif /* CONFIG_PM */
+
+/* returns count found (>= 0), or negative on error */
+static int __init vortex_eisa_init (void)
+{
+ long ioaddr;
+ int rc;
+ int orig_cards_found = vortex_cards_found;
+
+ /* Now check all slots of the EISA bus. */
+ if (!EISA_bus)
+ return 0;
+
+ for (ioaddr = 0x1000; ioaddr < 0x9000; ioaddr += 0x1000) {
+ int device_id;
+
+ if (request_region(ioaddr, VORTEX_TOTAL_SIZE, DRV_NAME) == NULL)
+ continue;
+
+ /* Check the standard EISA ID register for an encoded '3Com'. */
+ if (inw(ioaddr + 0xC80) != 0x6d50) {
+ release_region (ioaddr, VORTEX_TOTAL_SIZE);
+ continue;
+ }
+
+ /* Check for a product that we support, 3c59{2,7} any rev. */
+ device_id = (inb(ioaddr + 0xC82)<<8) + inb(ioaddr + 0xC83);
+ if ((device_id & 0xFF00) != 0x5900) {
+ release_region (ioaddr, VORTEX_TOTAL_SIZE);
+ continue;
+ }
+
+ rc = vortex_probe1(NULL, ioaddr, inw(ioaddr + 0xC88) >> 12,
+ EISA_TBL_OFFSET, vortex_cards_found);
+ if (rc == 0)
+ vortex_cards_found++;
+ else
+ release_region (ioaddr, VORTEX_TOTAL_SIZE);
+ }
+
+ /* Special code to work-around the Compaq PCI BIOS32 problem. */
+ if (compaq_ioaddr) {
+ vortex_probe1(NULL, compaq_ioaddr, compaq_irq,
+ compaq_device_id, vortex_cards_found++);
+ }
+
+ return vortex_cards_found - orig_cards_found;
+}
+
+/* returns count (>= 0), or negative on error */
+static int __devinit vortex_init_one (struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int rc;
+
+ /* wake up and enable device */
+ if (pci_enable_device (pdev)) {
+ rc = -EIO;
+ } else {
+ rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq,
+ ent->driver_data, vortex_cards_found);
+ if (rc == 0)
+ vortex_cards_found++;
+ }
+ return rc;
+}
+
+/*
+ * Start up the PCI device which is described by *pdev.
+ * Return 0 on success.
+ *
+ * NOTE: pdev can be NULL, for the case of an EISA driver
+ */
+static int __devinit vortex_probe1(struct pci_dev *pdev,
+ long ioaddr, int irq,
+ int chip_idx, int card_idx)
+{
+ struct vortex_private *vp;
+ int option;
+ unsigned int eeprom[0x40], checksum = 0; /* EEPROM contents */
+ int i, step;
+ struct net_device *dev;
+ static int printed_version;
+ int retval, print_info;
+ struct vortex_chip_info * const vci = &vortex_info_tbl[chip_idx];
+ char *print_name;
+
+ if (!printed_version) {
+ printk (version);
+ printed_version = 1;
+ }
+
+ print_name = pdev ? pdev->slot_name : "3c59x";
+
+ dev = alloc_etherdev(sizeof(*vp));
+ retval = -ENOMEM;
+ if (!dev) {
+ printk (KERN_ERR PFX "unable to allocate etherdev, aborting\n");
+ goto out;
+ }
+ SET_MODULE_OWNER(dev);
+ vp = dev->priv;
+
+ /* The lower four bits are the media type. */
+ if (dev->mem_start) {
+ /*
+ * The 'options' param is passed in as the third arg to the
+ * LILO 'ether=' argument for non-modular use
+ */
+ option = dev->mem_start;
+ }
+ else if (card_idx < MAX_UNITS)
+ option = options[card_idx];
+ else
+ option = -1;
+
+ if (option > 0) {
+ if (option & 0x8000)
+ vortex_debug = 7;
+ if (option & 0x4000)
+ vortex_debug = 2;
+ if (option & 0x0400)
+ vp->enable_wol = 1;
+ }
+
+ print_info = (vortex_debug > 1);
+ if (print_info)
+ printk (KERN_INFO "See Documentation/networking/vortex.txt\n");
+
+ printk(KERN_INFO "%s: 3Com %s %s at 0x%lx. Vers " DRV_VERSION "\n",
+ print_name,
+ pdev ? "PCI" : "EISA",
+ vci->name,
+ ioaddr);
+
+ dev->base_addr = ioaddr;
+ dev->irq = irq;
+ dev->mtu = mtu;
+ vp->drv_flags = vci->drv_flags;
+ vp->has_nway = (vci->drv_flags & HAS_NWAY) ? 1 : 0;
+ vp->io_size = vci->io_size;
+ vp->card_idx = card_idx;
+
+ /* module list only for EISA devices */
+ if (pdev == NULL) {
+ vp->next_module = root_vortex_eisa_dev;
+ root_vortex_eisa_dev = dev;
+ }
+
+ /* PCI-only startup logic */
+ if (pdev) {
+ /* EISA resources already marked, so only PCI needs to do this here */
+ /* Ignore return value, because Cardbus drivers already allocate for us */
+ if (request_region(ioaddr, vci->io_size, print_name) != NULL)
+ vp->must_free_region = 1;
+
+ /* enable bus-mastering if necessary */
+ if (vci->flags & PCI_USES_MASTER)
+ pci_set_master (pdev);
+
+ if (vci->drv_flags & IS_VORTEX) {
+ u8 pci_latency;
+ u8 new_latency = 248;
+
+ /* Check the PCI latency value. On the 3c590 series the latency timer
+ must be set to the maximum value to avoid data corruption that occurs
+ when the timer expires during a transfer. This bug exists the Vortex
+ chip only. */
+ pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency);
+ if (pci_latency < new_latency) {
+ printk(KERN_INFO "%s: Overriding PCI latency"
+ " timer (CFLT) setting of %d, new value is %d.\n",
+ print_name, pci_latency, new_latency);
+ pci_write_config_byte(pdev, PCI_LATENCY_TIMER, new_latency);
+ }
+ }
+ }
+
+ spin_lock_init(&vp->lock);
+ spin_lock_init(&vp->mdio_lock);
+ vp->pdev = pdev;
+
+ /* Makes sure rings are at least 16 byte aligned. */
+ vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+ + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+ &vp->rx_ring_dma);
+ retval = -ENOMEM;
+ if (vp->rx_ring == 0)
+ goto free_region;
+
+ vp->tx_ring = (struct boom_tx_desc *)(vp->rx_ring + RX_RING_SIZE);
+ vp->tx_ring_dma = vp->rx_ring_dma + sizeof(struct boom_rx_desc) * RX_RING_SIZE;
+
+ /* if we are a PCI driver, we store info in pdev->driver_data
+ * instead of a module list */
+ if (pdev)
+ pci_set_drvdata(pdev, dev);
+
+ vp->media_override = 7;
+ if (option >= 0) {
+ vp->media_override = ((option & 7) == 2) ? 0 : option & 15;
+ if (vp->media_override != 7)
+ vp->medialock = 1;
+ vp->full_duplex = (option & 0x200) ? 1 : 0;
+ vp->bus_master = (option & 16) ? 1 : 0;
+ }
+
+ if (card_idx < MAX_UNITS) {
+ if (full_duplex[card_idx] > 0)
+ vp->full_duplex = 1;
+ if (flow_ctrl[card_idx] > 0)
+ vp->flow_ctrl = 1;
+ if (enable_wol[card_idx] > 0)
+ vp->enable_wol = 1;
+ }
+
+ vp->force_fd = vp->full_duplex;
+ vp->options = option;
+ /* Read the station address from the EEPROM. */
+ EL3WINDOW(0);
+ {
+ int base;
+
+ if (vci->drv_flags & EEPROM_8BIT)
+ base = 0x230;
+ else if (vci->drv_flags & EEPROM_OFFSET)
+ base = EEPROM_Read + 0x30;
+ else
+ base = EEPROM_Read;
+
+ for (i = 0; i < 0x40; i++) {
+ int timer;
+ outw(base + i, ioaddr + Wn0EepromCmd);
+ /* Pause for at least 162 us. for the read to take place. */
+ for (timer = 10; timer >= 0; timer--) {
+ udelay(162);
+ if ((inw(ioaddr + Wn0EepromCmd) & 0x8000) == 0)
+ break;
+ }
+ eeprom[i] = inw(ioaddr + Wn0EepromData);
+ }
+ }
+ for (i = 0; i < 0x18; i++)
+ checksum ^= eeprom[i];
+ checksum = (checksum ^ (checksum >> 8)) & 0xff;
+ if (checksum != 0x00) { /* Grrr, needless incompatible change 3Com. */
+ while (i < 0x21)
+ checksum ^= eeprom[i++];
+ checksum = (checksum ^ (checksum >> 8)) & 0xff;
+ }
+ if ((checksum != 0x00) && !(vci->drv_flags & IS_TORNADO))
+ printk(" ***INVALID CHECKSUM %4.4x*** ", checksum);
+ for (i = 0; i < 3; i++)
+ ((u16 *)dev->dev_addr)[i] = htons(eeprom[i + 10]);
+ if (print_info) {
+ for (i = 0; i < 6; i++)
+ printk("%c%2.2x", i ? ':' : ' ', dev->dev_addr[i]);
+ }
+ EL3WINDOW(2);
+ for (i = 0; i < 6; i++)
+ outb(dev->dev_addr[i], ioaddr + i);
+
+#ifdef __sparc__
+ if (print_info)
+ printk(", IRQ %s\n", __irq_itoa(dev->irq));
+#else
+ if (print_info)
+ printk(", IRQ %d\n", dev->irq);
+ /* Tell them about an invalid IRQ. */
+ if (dev->irq <= 0 || dev->irq >= NR_IRQS)
+ printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
+ dev->irq);
+#endif
+
+ EL3WINDOW(4);
+ step = (inb(ioaddr + Wn4_NetDiag) & 0x1e) >> 1;
+ if (print_info) {
+ printk(KERN_INFO " product code %02x%02x rev %02x.%d date %02d-"
+ "%02d-%02d\n", eeprom[6]&0xff, eeprom[6]>>8, eeprom[0x14],
+ step, (eeprom[4]>>5) & 15, eeprom[4] & 31, eeprom[4]>>9);
+ }
+
+
+ if (pdev && vci->drv_flags & HAS_CB_FNS) {
+ unsigned long fn_st_addr; /* Cardbus function status space */
+ unsigned short n;
+
+ fn_st_addr = pci_resource_start (pdev, 2);
+ if (fn_st_addr) {
+ vp->cb_fn_base = ioremap(fn_st_addr, 128);
+ retval = -ENOMEM;
+ if (!vp->cb_fn_base)
+ goto free_ring;
+ }
+ if (print_info) {
+ printk(KERN_INFO "%s: CardBus functions mapped %8.8lx->%p\n",
+ print_name, fn_st_addr, vp->cb_fn_base);
+ }
+ EL3WINDOW(2);
+
+ n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
+ if (vp->drv_flags & INVERT_LED_PWR)
+ n |= 0x10;
+ if (vp->drv_flags & INVERT_MII_PWR)
+ n |= 0x4000;
+ outw(n, ioaddr + Wn2_ResetOptions);
+ }
+
+ /* Extract our information from the EEPROM data. */
+ vp->info1 = eeprom[13];
+ vp->info2 = eeprom[15];
+ vp->capabilities = eeprom[16];
+
+ if (vp->info1 & 0x8000) {
+ vp->full_duplex = 1;
+ if (print_info)
+ printk(KERN_INFO "Full duplex capable\n");
+ }
+
+ {
+ static const char * ram_split[] = {"5:3", "3:1", "1:1", "3:5"};
+ unsigned int config;
+ EL3WINDOW(3);
+ vp->available_media = inw(ioaddr + Wn3_Options);
+ if ((vp->available_media & 0xff) == 0) /* Broken 3c916 */
+ vp->available_media = 0x40;
+ config = inl(ioaddr + Wn3_Config);
+ if (print_info) {
+ printk(KERN_DEBUG " Internal config register is %4.4x, "
+ "transceivers %#x.\n", config, inw(ioaddr + Wn3_Options));
+ printk(KERN_INFO " %dK %s-wide RAM %s Rx:Tx split, %s%s interface.\n",
+ 8 << RAM_SIZE(config),
+ RAM_WIDTH(config) ? "word" : "byte",
+ ram_split[RAM_SPLIT(config)],
+ AUTOSELECT(config) ? "autoselect/" : "",
+ XCVR(config) > XCVR_ExtMII ? "<invalid transceiver>" :
+ media_tbl[XCVR(config)].name);
+ }
+ vp->default_media = XCVR(config);
+ if (vp->default_media == XCVR_NWAY)
+ vp->has_nway = 1;
+ vp->autoselect = AUTOSELECT(config);
+ }
+
+ if (vp->media_override != 7) {
+ printk(KERN_INFO "%s: Media override to transceiver type %d (%s).\n",
+ print_name, vp->media_override,
+ media_tbl[vp->media_override].name);
+ dev->if_port = vp->media_override;
+ } else
+ dev->if_port = vp->default_media;
+
+ if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
+ int phy, phy_idx = 0;
+ EL3WINDOW(4);
+ mii_preamble_required++;
+ mii_preamble_required++;
+ mdio_read(dev, 24, 1);
+ for (phy = 0; phy < 32 && phy_idx < 1; phy++) {
+ int mii_status, phyx;
+
+ /*
+ * For the 3c905CX we look at index 24 first, because it bogusly
+ * reports an external PHY at all indices
+ */
+ if (phy == 0)
+ phyx = 24;
+ else if (phy <= 24)
+ phyx = phy - 1;
+ else
+ phyx = phy;
+ mii_status = mdio_read(dev, phyx, 1);
+ if (mii_status && mii_status != 0xffff) {
+ vp->phys[phy_idx++] = phyx;
+ if (print_info) {
+ printk(KERN_INFO " MII transceiver found at address %d,"
+ " status %4x.\n", phyx, mii_status);
+ }
+ if ((mii_status & 0x0040) == 0)
+ mii_preamble_required++;
+ }
+ }
+ mii_preamble_required--;
+ if (phy_idx == 0) {
+ printk(KERN_WARNING" ***WARNING*** No MII transceivers found!\n");
+ vp->phys[0] = 24;
+ } else {
+ vp->advertising = mdio_read(dev, vp->phys[0], 4);
+ if (vp->full_duplex) {
+ /* Only advertise the FD media types. */
+ vp->advertising &= ~0x02A0;
+ mdio_write(dev, vp->phys[0], 4, vp->advertising);
+ }
+ }
+ }
+
+ if (vp->capabilities & CapBusMaster) {
+ vp->full_bus_master_tx = 1;
+ if (print_info) {
+ printk(KERN_INFO " Enabling bus-master transmits and %s receives.\n",
+ (vp->info2 & 1) ? "early" : "whole-frame" );
+ }
+ vp->full_bus_master_rx = (vp->info2 & 1) ? 1 : 2;
+ vp->bus_master = 0; /* AKPM: vortex only */
+ }
+
+ /* The 3c59x-specific entries in the device structure. */
+ dev->open = vortex_open;
+ if (vp->full_bus_master_tx) {
+ dev->hard_start_xmit = boomerang_start_xmit;
+ /* Actually, it still should work with iommu. */
+ dev->features |= NETIF_F_SG;
+ if (((hw_checksums[card_idx] == -1) && (vp->drv_flags & HAS_HWCKSM)) ||
+ (hw_checksums[card_idx] == 1)) {
+ dev->features |= NETIF_F_IP_CSUM;
+ }
+ } else {
+ dev->hard_start_xmit = vortex_start_xmit;
+ }
+
+ if (print_info) {
+ printk(KERN_INFO "%s: scatter/gather %sabled. h/w checksums %sabled\n",
+ print_name,
+ (dev->features & NETIF_F_SG) ? "en":"dis",
+ (dev->features & NETIF_F_IP_CSUM) ? "en":"dis");
+ }
+
+ dev->stop = vortex_close;
+ dev->get_stats = vortex_get_stats;
+ dev->do_ioctl = vortex_ioctl;
+ dev->set_multicast_list = set_rx_mode;
+ dev->tx_timeout = vortex_tx_timeout;
+ dev->watchdog_timeo = (watchdog * HZ) / 1000;
+ if (pdev && vp->enable_wol) {
+ vp->pm_state_valid = 1;
+ pci_save_state(vp->pdev, vp->power_state);
+ acpi_set_WOL(dev);
+ }
+ retval = register_netdev(dev);
+ if (retval == 0)
+ return 0;
+
+free_ring:
+ pci_free_consistent(pdev,
+ sizeof(struct boom_rx_desc) * RX_RING_SIZE
+ + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+ vp->rx_ring,
+ vp->rx_ring_dma);
+free_region:
+ if (vp->must_free_region)
+ release_region(ioaddr, vci->io_size);
+ kfree (dev);
+ printk(KERN_ERR PFX "vortex_probe1 fails. Returns %d\n", retval);
+out:
+ return retval;
+}
+
+static void
+issue_and_wait(struct net_device *dev, int cmd)
+{
+ int i;
+
+ outw(cmd, dev->base_addr + EL3_CMD);
+ for (i = 0; i < 2000; i++) {
+ if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress))
+ return;
+ }
+
+ /* OK, that didn't work. Do it the slow way. One second */
+ for (i = 0; i < 100000; i++) {
+ if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress)) {
+ if (vortex_debug > 1)
+ printk(KERN_INFO "%s: command 0x%04x took %d usecs\n",
+ dev->name, cmd, i * 10);
+ return;
+ }
+ udelay(10);
+ }
+ printk(KERN_ERR "%s: command 0x%04x did not complete! Status=0x%x\n",
+ dev->name, cmd, inw(dev->base_addr + EL3_STATUS));
+}
+
+static void
+vortex_up(struct net_device *dev)
+{
+ long ioaddr = dev->base_addr;
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ unsigned int config;
+ int i;
+
+ if (vp->pdev && vp->enable_wol) {
+ pci_set_power_state(vp->pdev, 0); /* Go active */
+ pci_restore_state(vp->pdev, vp->power_state);
+ }
+
+ /* Before initializing select the active media port. */
+ EL3WINDOW(3);
+ config = inl(ioaddr + Wn3_Config);
+
+ if (vp->media_override != 7) {
+ printk(KERN_INFO "%s: Media override to transceiver %d (%s).\n",
+ dev->name, vp->media_override,
+ media_tbl[vp->media_override].name);
+ dev->if_port = vp->media_override;
+ } else if (vp->autoselect) {
+ if (vp->has_nway) {
+ if (vortex_debug > 1)
+ printk(KERN_INFO "%s: using NWAY device table, not %d\n",
+ dev->name, dev->if_port);
+ dev->if_port = XCVR_NWAY;
+ } else {
+ /* Find first available media type, starting with 100baseTx. */
+ dev->if_port = XCVR_100baseTx;
+ while (! (vp->available_media & media_tbl[dev->if_port].mask))
+ dev->if_port = media_tbl[dev->if_port].next;
+ if (vortex_debug > 1)
+ printk(KERN_INFO "%s: first available media type: %s\n",
+ dev->name, media_tbl[dev->if_port].name);
+ }
+ } else {
+ dev->if_port = vp->default_media;
+ if (vortex_debug > 1)
+ printk(KERN_INFO "%s: using default media %s\n",
+ dev->name, media_tbl[dev->if_port].name);
+ }
+
+ init_timer(&vp->timer);
+ vp->timer.expires = RUN_AT(media_tbl[dev->if_port].wait);
+ vp->timer.data = (unsigned long)dev;
+ vp->timer.function = vortex_timer; /* timer handler */
+ add_timer(&vp->timer);
+
+ init_timer(&vp->rx_oom_timer);
+ vp->rx_oom_timer.data = (unsigned long)dev;
+ vp->rx_oom_timer.function = rx_oom_timer;
+
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "%s: Initial media type %s.\n",
+ dev->name, media_tbl[dev->if_port].name);
+
+ vp->full_duplex = vp->force_fd;
+ config = BFINS(config, dev->if_port, 20, 4);
+ if (vortex_debug > 6)
+ printk(KERN_DEBUG "vortex_up(): writing 0x%x to InternalConfig\n", config);
+ outl(config, ioaddr + Wn3_Config);
+
+ if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
+ int mii_reg1, mii_reg5;
+ EL3WINDOW(4);
+ /* Read BMSR (reg1) only to clear old status. */
+ mii_reg1 = mdio_read(dev, vp->phys[0], 1);
+ mii_reg5 = mdio_read(dev, vp->phys[0], 5);
+ if (mii_reg5 == 0xffff || mii_reg5 == 0x0000)
+ ; /* No MII device or no link partner report */
+ else if ((mii_reg5 & 0x0100) != 0 /* 100baseTx-FD */
+ || (mii_reg5 & 0x00C0) == 0x0040) /* 10T-FD, but not 100-HD */
+ vp->full_duplex = 1;
+ vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0);
+ if (vortex_debug > 1)
+ printk(KERN_INFO "%s: MII #%d status %4.4x, link partner capability %4.4x,"
+ " info1 %04x, setting %s-duplex.\n",
+ dev->name, vp->phys[0],
+ mii_reg1, mii_reg5,
+ vp->info1, ((vp->info1 & 0x8000) || vp->full_duplex) ? "full" : "half");
+ EL3WINDOW(3);
+ }
+
+ /* Set the full-duplex bit. */
+ outw( ((vp->info1 & 0x8000) || vp->full_duplex ? 0x20 : 0) |
+ (dev->mtu > 1500 ? 0x40 : 0) |
+ ((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0),
+ ioaddr + Wn3_MAC_Ctrl);
+
+ if (vortex_debug > 1) {
+ printk(KERN_DEBUG "%s: vortex_up() InternalConfig %8.8x.\n",
+ dev->name, config);
+ }
+
+ issue_and_wait(dev, TxReset);
+ /*
+ * Don't reset the PHY - that upsets autonegotiation during DHCP operations.
+ */
+ issue_and_wait(dev, RxReset|0x04);
+
+ outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
+
+ if (vortex_debug > 1) {
+ EL3WINDOW(4);
+ printk(KERN_DEBUG "%s: vortex_up() irq %d media status %4.4x.\n",
+ dev->name, dev->irq, inw(ioaddr + Wn4_Media));
+ }
+
+ /* Set the station address and mask in window 2 each time opened. */
+ EL3WINDOW(2);
+ for (i = 0; i < 6; i++)
+ outb(dev->dev_addr[i], ioaddr + i);
+ for (; i < 12; i+=2)
+ outw(0, ioaddr + i);
+
+ if (vp->cb_fn_base) {
+ unsigned short n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
+ if (vp->drv_flags & INVERT_LED_PWR)
+ n |= 0x10;
+ if (vp->drv_flags & INVERT_MII_PWR)
+ n |= 0x4000;
+ outw(n, ioaddr + Wn2_ResetOptions);
+ }
+
+ if (dev->if_port == XCVR_10base2)
+ /* Start the thinnet transceiver. We should really wait 50ms...*/
+ outw(StartCoax, ioaddr + EL3_CMD);
+ if (dev->if_port != XCVR_NWAY) {
+ EL3WINDOW(4);
+ outw((inw(ioaddr + Wn4_Media) & ~(Media_10TP|Media_SQE)) |
+ media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media);
+ }
+
+ /* Switch to the stats window, and clear all stats by reading. */
+ outw(StatsDisable, ioaddr + EL3_CMD);
+ EL3WINDOW(6);
+ for (i = 0; i < 10; i++)
+ inb(ioaddr + i);
+ inw(ioaddr + 10);
+ inw(ioaddr + 12);
+ /* New: On the Vortex we must also clear the BadSSD counter. */
+ EL3WINDOW(4);
+ inb(ioaddr + 12);
+ /* ..and on the Boomerang we enable the extra statistics bits. */
+ outw(0x0040, ioaddr + Wn4_NetDiag);
+
+ /* Switch to register set 7 for normal use. */
+ EL3WINDOW(7);
+
+ if (vp->full_bus_master_rx) { /* Boomerang bus master. */
+ vp->cur_rx = vp->dirty_rx = 0;
+ /* Initialize the RxEarly register as recommended. */
+ outw(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
+ outl(0x0020, ioaddr + PktStatus);
+ outl(vp->rx_ring_dma, ioaddr + UpListPtr);
+ }
+ if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */
+ vp->cur_tx = vp->dirty_tx = 0;
+ if (vp->drv_flags & IS_BOOMERANG)
+ outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold); /* Room for a packet. */
+ /* Clear the Rx, Tx rings. */
+ for (i = 0; i < RX_RING_SIZE; i++) /* AKPM: this is done in vortex_open, too */
+ vp->rx_ring[i].status = 0;
+ for (i = 0; i < TX_RING_SIZE; i++)
+ vp->tx_skbuff[i] = 0;
+ outl(0, ioaddr + DownListPtr);
+ }
+ /* Set receiver mode: presumably accept b-case and phys addr only. */
+ set_rx_mode(dev);
+ outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
+
+// issue_and_wait(dev, SetTxStart|0x07ff);
+ outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */
+ outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */
+ /* Allow status bits to be seen. */
+ vp->status_enable = SetStatusEnb | HostError|IntReq|StatsFull|TxComplete|
+ (vp->full_bus_master_tx ? DownComplete : TxAvailable) |
+ (vp->full_bus_master_rx ? UpComplete : RxComplete) |
+ (vp->bus_master ? DMADone : 0);
+ vp->intr_enable = SetIntrEnb | IntLatch | TxAvailable |
+ (vp->full_bus_master_rx ? 0 : RxComplete) |
+ StatsFull | HostError | TxComplete | IntReq
+ | (vp->bus_master ? DMADone : 0) | UpComplete | DownComplete;
+ outw(vp->status_enable, ioaddr + EL3_CMD);
+ /* Ack all pending events, and set active indicator mask. */
+ outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
+ ioaddr + EL3_CMD);
+ outw(vp->intr_enable, ioaddr + EL3_CMD);
+ if (vp->cb_fn_base) /* The PCMCIA people are idiots. */
+ writel(0x8000, vp->cb_fn_base + 4);
+ netif_start_queue (dev);
+}
+
+static int
+vortex_open(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ int i;
+ int retval;
+
+ /* Use the now-standard shared IRQ implementation. */
+ if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
+ &boomerang_interrupt : &vortex_interrupt, SA_SHIRQ, dev->name, dev))) {
+ printk(KERN_ERR "%s: Could not reserve IRQ %d\n", dev->name, dev->irq);
+ goto out;
+ }
+
+ if (vp->full_bus_master_rx) { /* Boomerang bus master. */
+ if (vortex_debug > 2)
+ printk(KERN_DEBUG "%s: Filling in the Rx ring.\n", dev->name);
+ for (i = 0; i < RX_RING_SIZE; i++) {
+ struct sk_buff *skb;
+ vp->rx_ring[i].next = cpu_to_le32(vp->rx_ring_dma + sizeof(struct boom_rx_desc) * (i+1));
+ vp->rx_ring[i].status = 0; /* Clear complete bit. */
+ vp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ | LAST_FRAG);
+ skb = dev_alloc_skb(PKT_BUF_SZ);
+ vp->rx_skbuff[i] = skb;
+ if (skb == NULL)
+ break; /* Bad news! */
+ skb->dev = dev; /* Mark as being used by this device. */
+ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
+ vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+ }
+ if (i != RX_RING_SIZE) {
+ int j;
+ printk(KERN_EMERG "%s: no memory for rx ring\n", dev->name);
+ for (j = 0; j < i; j++) {
+ if (vp->rx_skbuff[j]) {
+ dev_kfree_skb(vp->rx_skbuff[j]);
+ vp->rx_skbuff[j] = 0;
+ }
+ }
+ retval = -ENOMEM;
+ goto out_free_irq;
+ }
+ /* Wrap the ring. */
+ vp->rx_ring[i-1].next = cpu_to_le32(vp->rx_ring_dma);
+ }
+
+ vortex_up(dev);
+ return 0;
+
+out_free_irq:
+ free_irq(dev->irq, dev);
+out:
+ if (vortex_debug > 1)
+ printk(KERN_ERR "%s: vortex_open() fails: returning %d\n", dev->name, retval);
+ return retval;
+}
+
+static void
+vortex_timer(unsigned long data)
+{
+ struct net_device *dev = (struct net_device *)data;
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+ int next_tick = 60*HZ;
+ int ok = 0;
+ int media_status, mii_status, old_window;
+
+ if (vortex_debug > 2) {
+ printk(KERN_DEBUG "%s: Media selection timer tick happened, %s.\n",
+ dev->name, media_tbl[dev->if_port].name);
+ printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo);
+ }
+
+ if (vp->medialock)
+ goto leave_media_alone;
+ disable_irq(dev->irq);
+ old_window = inw(ioaddr + EL3_CMD) >> 13;
+ EL3WINDOW(4);
+ media_status = inw(ioaddr + Wn4_Media);
+ switch (dev->if_port) {
+ case XCVR_10baseT: case XCVR_100baseTx: case XCVR_100baseFx:
+ if (media_status & Media_LnkBeat) {
+ ok = 1;
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "%s: Media %s has link beat, %x.\n",
+ dev->name, media_tbl[dev->if_port].name, media_status);
+ } else if (vortex_debug > 1)
+ printk(KERN_DEBUG "%s: Media %s has no link beat, %x.\n",
+ dev->name, media_tbl[dev->if_port].name, media_status);
+ break;
+ case XCVR_MII: case XCVR_NWAY:
+ {
+ mii_status = mdio_read(dev, vp->phys[0], 1);
+ ok = 1;
+ if (vortex_debug > 2)
+ printk(KERN_DEBUG "%s: MII transceiver has status %4.4x.\n",
+ dev->name, mii_status);
+ if (mii_status & 0x0004) {
+ int mii_reg5 = mdio_read(dev, vp->phys[0], 5);
+ if (! vp->force_fd && mii_reg5 != 0xffff) {
+ int duplex = (mii_reg5&0x0100) ||
+ (mii_reg5 & 0x01C0) == 0x0040;
+ if (vp->full_duplex != duplex) {
+ vp->full_duplex = duplex;
+ printk(KERN_INFO "%s: Setting %s-duplex based on MII "
+ "#%d link partner capability of %4.4x.\n",
+ dev->name, vp->full_duplex ? "full" : "half",
+ vp->phys[0], mii_reg5);
+ /* Set the full-duplex bit. */
+ EL3WINDOW(3);
+ outw( (vp->full_duplex ? 0x20 : 0) |
+ (dev->mtu > 1500 ? 0x40 : 0) |
+ ((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0),
+ ioaddr + Wn3_MAC_Ctrl);
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "Setting duplex in Wn3_MAC_Ctrl\n");
+ /* AKPM: bug: should reset Tx and Rx after setting Duplex. Page 180 */
+ }
+ }
+ }
+ }
+ break;
+ default: /* Other media types handled by Tx timeouts. */
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "%s: Media %s has no indication, %x.\n",
+ dev->name, media_tbl[dev->if_port].name, media_status);
+ ok = 1;
+ }
+ if ( ! ok) {
+ unsigned int config;
+
+ do {
+ dev->if_port = media_tbl[dev->if_port].next;
+ } while ( ! (vp->available_media & media_tbl[dev->if_port].mask));
+ if (dev->if_port == XCVR_Default) { /* Go back to default. */
+ dev->if_port = vp->default_media;
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "%s: Media selection failing, using default "
+ "%s port.\n",
+ dev->name, media_tbl[dev->if_port].name);
+ } else {
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "%s: Media selection failed, now trying "
+ "%s port.\n",
+ dev->name, media_tbl[dev->if_port].name);
+ next_tick = media_tbl[dev->if_port].wait;
+ }
+ outw((media_status & ~(Media_10TP|Media_SQE)) |
+ media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media);
+
+ EL3WINDOW(3);
+ config = inl(ioaddr + Wn3_Config);
+ config = BFINS(config, dev->if_port, 20, 4);
+ outl(config, ioaddr + Wn3_Config);
+
+ outw(dev->if_port == XCVR_10base2 ? StartCoax : StopCoax,
+ ioaddr + EL3_CMD);
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "wrote 0x%08x to Wn3_Config\n", config);
+ /* AKPM: FIXME: Should reset Rx & Tx here. P60 of 3c90xc.pdf */
+ }
+ EL3WINDOW(old_window);
+ enable_irq(dev->irq);
+
+leave_media_alone:
+ if (vortex_debug > 2)
+ printk(KERN_DEBUG "%s: Media selection timer finished, %s.\n",
+ dev->name, media_tbl[dev->if_port].name);
+
+ mod_timer(&vp->timer, RUN_AT(next_tick));
+ if (vp->deferred)
+ outw(FakeIntr, ioaddr + EL3_CMD);
+ return;
+}
+
+static void vortex_tx_timeout(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+
+ printk(KERN_ERR "%s: transmit timed out, tx_status %2.2x status %4.4x.\n",
+ dev->name, inb(ioaddr + TxStatus),
+ inw(ioaddr + EL3_STATUS));
+ EL3WINDOW(4);
+ printk(KERN_ERR " diagnostics: net %04x media %04x dma %8.8x.\n",
+ inw(ioaddr + Wn4_NetDiag), inw(ioaddr + Wn4_Media),
+ inl(ioaddr + PktStatus));
+ /* Slight code bloat to be user friendly. */
+ if ((inb(ioaddr + TxStatus) & 0x88) == 0x88)
+ printk(KERN_ERR "%s: Transmitter encountered 16 collisions --"
+ " network cable problem?\n", dev->name);
+ if (inw(ioaddr + EL3_STATUS) & IntLatch) {
+ printk(KERN_ERR "%s: Interrupt posted but not delivered --"
+ " IRQ blocked by another device?\n", dev->name);
+ /* Bad idea here.. but we might as well handle a few events. */
+ {
+ /*
+ * Block interrupts because vortex_interrupt does a bare spin_lock()
+ */
+ unsigned long flags;
+ local_irq_save(flags);
+ if (vp->full_bus_master_tx)
+ boomerang_interrupt(dev->irq, dev, 0);
+ else
+ vortex_interrupt(dev->irq, dev, 0);
+ local_irq_restore(flags);
+ }
+ }
+
+ if (vortex_debug > 0)
+ dump_tx_ring(dev);
+
+ issue_and_wait(dev, TxReset);
+
+ vp->stats.tx_errors++;
+ if (vp->full_bus_master_tx) {
+ printk(KERN_DEBUG "%s: Resetting the Tx ring pointer.\n", dev->name);
+ if (vp->cur_tx - vp->dirty_tx > 0 && inl(ioaddr + DownListPtr) == 0)
+ outl(vp->tx_ring_dma + (vp->dirty_tx % TX_RING_SIZE) * sizeof(struct boom_tx_desc),
+ ioaddr + DownListPtr);
+ if (vp->cur_tx - vp->dirty_tx < TX_RING_SIZE)
+ netif_wake_queue (dev);
+ if (vp->drv_flags & IS_BOOMERANG)
+ outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold);
+ outw(DownUnstall, ioaddr + EL3_CMD);
+ } else {
+ vp->stats.tx_dropped++;
+ netif_wake_queue(dev);
+ }
+
+ /* Issue Tx Enable */
+ outw(TxEnable, ioaddr + EL3_CMD);
+ dev->trans_start = jiffies;
+
+ /* Switch to register set 7 for normal use. */
+ EL3WINDOW(7);
+}
+
+/*
+ * Handle uncommon interrupt sources. This is a separate routine to minimize
+ * the cache impact.
+ */
+static void
+vortex_error(struct net_device *dev, int status)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+ int do_tx_reset = 0, reset_mask = 0;
+ unsigned char tx_status = 0;
+
+ if (vortex_debug > 2) {
+ printk(KERN_ERR "%s: vortex_error(), status=0x%x\n", dev->name, status);
+ }
+
+ if (status & TxComplete) { /* Really "TxError" for us. */
+ tx_status = inb(ioaddr + TxStatus);
+ /* Presumably a tx-timeout. We must merely re-enable. */
+ if (vortex_debug > 2
+ || (tx_status != 0x88 && vortex_debug > 0)) {
+ printk(KERN_ERR "%s: Transmit error, Tx status register %2.2x.\n",
+ dev->name, tx_status);
+ if (tx_status == 0x82) {
+ printk(KERN_ERR "Probably a duplex mismatch. See "
+ "Documentation/networking/vortex.txt\n");
+ }
+ dump_tx_ring(dev);
+ }
+ if (tx_status & 0x14) vp->stats.tx_fifo_errors++;
+ if (tx_status & 0x38) vp->stats.tx_aborted_errors++;
+ outb(0, ioaddr + TxStatus);
+ if (tx_status & 0x30) { /* txJabber or txUnderrun */
+ do_tx_reset = 1;
+ } else if ((tx_status & 0x08) && (vp->drv_flags & MAX_COLLISION_RESET)) { /* maxCollisions */
+ do_tx_reset = 1;
+ reset_mask = 0x0108; /* Reset interface logic, but not download logic */
+ } else { /* Merely re-enable the transmitter. */
+ outw(TxEnable, ioaddr + EL3_CMD);
+ }
+ }
+
+ if (status & RxEarly) { /* Rx early is unused. */
+ vortex_rx(dev);
+ outw(AckIntr | RxEarly, ioaddr + EL3_CMD);
+ }
+ if (status & StatsFull) { /* Empty statistics. */
+ static int DoneDidThat;
+ if (vortex_debug > 4)
+ printk(KERN_DEBUG "%s: Updating stats.\n", dev->name);
+ update_stats(ioaddr, dev);
+ /* HACK: Disable statistics as an interrupt source. */
+ /* This occurs when we have the wrong media type! */
+ if (DoneDidThat == 0 &&
+ inw(ioaddr + EL3_STATUS) & StatsFull) {
+ printk(KERN_WARNING "%s: Updating statistics failed, disabling "
+ "stats as an interrupt source.\n", dev->name);
+ EL3WINDOW(5);
+ outw(SetIntrEnb | (inw(ioaddr + 10) & ~StatsFull), ioaddr + EL3_CMD);
+ vp->intr_enable &= ~StatsFull;
+ EL3WINDOW(7);
+ DoneDidThat++;
+ }
+ }
+ if (status & IntReq) { /* Restore all interrupt sources. */
+ outw(vp->status_enable, ioaddr + EL3_CMD);
+ outw(vp->intr_enable, ioaddr + EL3_CMD);
+ }
+ if (status & HostError) {
+ u16 fifo_diag;
+ EL3WINDOW(4);
+ fifo_diag = inw(ioaddr + Wn4_FIFODiag);
+ printk(KERN_ERR "%s: Host error, FIFO diagnostic register %4.4x.\n",
+ dev->name, fifo_diag);
+ /* Adapter failure requires Tx/Rx reset and reinit. */
+ if (vp->full_bus_master_tx) {
+ int bus_status = inl(ioaddr + PktStatus);
+ /* 0x80000000 PCI master abort. */
+ /* 0x40000000 PCI target abort. */
+ if (vortex_debug)
+ printk(KERN_ERR "%s: PCI bus error, bus status %8.8x\n", dev->name, bus_status);
+
+ /* In this case, blow the card away */
+ vortex_down(dev);
+ issue_and_wait(dev, TotalReset | 0xff);
+ vortex_up(dev); /* AKPM: bug. vortex_up() assumes that the rx ring is full. It may not be. */
+ } else if (fifo_diag & 0x0400)
+ do_tx_reset = 1;
+ if (fifo_diag & 0x3000) {
+ /* Reset Rx fifo and upload logic */
+ issue_and_wait(dev, RxReset|0x07);
+ /* Set the Rx filter to the current state. */
+ set_rx_mode(dev);
+ outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */
+ outw(AckIntr | HostError, ioaddr + EL3_CMD);
+ }
+ }
+
+ if (do_tx_reset) {
+ issue_and_wait(dev, TxReset|reset_mask);
+ outw(TxEnable, ioaddr + EL3_CMD);
+ if (!vp->full_bus_master_tx)
+ netif_wake_queue(dev);
+ }
+}
+
+static int
+vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+
+ /* Put out the doubleword header... */
+ outl(skb->len, ioaddr + TX_FIFO);
+ if (vp->bus_master) {
+ /* Set the bus-master controller to transfer the packet. */
+ int len = (skb->len + 3) & ~3;
+ outl( vp->tx_skb_dma = pci_map_single(vp->pdev, skb->data, len, PCI_DMA_TODEVICE),
+ ioaddr + Wn7_MasterAddr);
+ outw(len, ioaddr + Wn7_MasterLen);
+ vp->tx_skb = skb;
+ outw(StartDMADown, ioaddr + EL3_CMD);
+ /* netif_wake_queue() will be called at the DMADone interrupt. */
+ } else {
+ /* ... and the packet rounded to a doubleword. */
+ outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
+ dev_kfree_skb (skb);
+ if (inw(ioaddr + TxFree) > 1536) {
+ netif_start_queue (dev); /* AKPM: redundant? */
+ } else {
+ /* Interrupt us when the FIFO has room for max-sized packet. */
+ netif_stop_queue(dev);
+ outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
+ }
+ }
+
+ dev->trans_start = jiffies;
+
+ /* Clear the Tx status stack. */
+ {
+ int tx_status;
+ int i = 32;
+
+ while (--i > 0 && (tx_status = inb(ioaddr + TxStatus)) > 0) {
+ if (tx_status & 0x3C) { /* A Tx-disabling error occurred. */
+ if (vortex_debug > 2)
+ printk(KERN_DEBUG "%s: Tx error, status %2.2x.\n",
+ dev->name, tx_status);
+ if (tx_status & 0x04) vp->stats.tx_fifo_errors++;
+ if (tx_status & 0x38) vp->stats.tx_aborted_errors++;
+ if (tx_status & 0x30) {
+ issue_and_wait(dev, TxReset);
+ }
+ outw(TxEnable, ioaddr + EL3_CMD);
+ }
+ outb(0x00, ioaddr + TxStatus); /* Pop the status stack. */
+ }
+ }
+ return 0;
+}
+
+static int
+boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+ /* Calculate the next Tx descriptor entry. */
+ int entry = vp->cur_tx % TX_RING_SIZE;
+ struct boom_tx_desc *prev_entry = &vp->tx_ring[(vp->cur_tx-1) % TX_RING_SIZE];
+ unsigned long flags;
+
+ if (vortex_debug > 6) {
+ printk(KERN_DEBUG "boomerang_start_xmit()\n");
+ if (vortex_debug > 3)
+ printk(KERN_DEBUG "%s: Trying to send a packet, Tx index %d.\n",
+ dev->name, vp->cur_tx);
+ }
+
+ if (vp->cur_tx - vp->dirty_tx >= TX_RING_SIZE) {
+ if (vortex_debug > 0)
+ printk(KERN_WARNING "%s: BUG! Tx Ring full, refusing to send buffer.\n",
+ dev->name);
+ netif_stop_queue(dev);
+ return 1;
+ }
+
+ vp->tx_skbuff[entry] = skb;
+
+ vp->tx_ring[entry].next = 0;
+#if DO_ZEROCOPY
+ if (skb->ip_summed != CHECKSUM_HW)
+ vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
+ else
+ vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum);
+
+ if (!skb_shinfo(skb)->nr_frags) {
+ vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data,
+ skb->len, PCI_DMA_TODEVICE));
+ vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len | LAST_FRAG);
+ } else {
+ int i;
+
+ vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data,
+ skb->len-skb->data_len, PCI_DMA_TODEVICE));
+ vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len-skb->data_len);
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ vp->tx_ring[entry].frag[i+1].addr =
+ cpu_to_le32(pci_map_single(vp->pdev,
+ (void*)page_address(frag->page) + frag->page_offset,
+ frag->size, PCI_DMA_TODEVICE));
+
+ if (i == skb_shinfo(skb)->nr_frags-1)
+ vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size|LAST_FRAG);
+ else
+ vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size);
+ }
+ }
+#else
+ vp->tx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data, skb->len, PCI_DMA_TODEVICE));
+ vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
+ vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
+#endif
+
+ spin_lock_irqsave(&vp->lock, flags);
+ /* Wait for the stall to complete. */
+ issue_and_wait(dev, DownStall);
+ prev_entry->next = cpu_to_le32(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc));
+ if (inl(ioaddr + DownListPtr) == 0) {
+ outl(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc), ioaddr + DownListPtr);
+ vp->queued_packet++;
+ }
+
+ vp->cur_tx++;
+ if (vp->cur_tx - vp->dirty_tx > TX_RING_SIZE - 1) {
+ netif_stop_queue (dev);
+ } else { /* Clear previous interrupt enable. */
+#if defined(tx_interrupt_mitigation)
+ /* Dubious. If in boomeang_interrupt "faster" cyclone ifdef
+ * were selected, this would corrupt DN_COMPLETE. No?
+ */
+ prev_entry->status &= cpu_to_le32(~TxIntrUploaded);
+#endif
+ }
+ outw(DownUnstall, ioaddr + EL3_CMD);
+ spin_unlock_irqrestore(&vp->lock, flags);
+ dev->trans_start = jiffies;
+ return 0;
+}
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+ after the Tx thread. */
+
+/*
+ * This is the ISR for the vortex series chips.
+ * full_bus_master_tx == 0 && full_bus_master_rx == 0
+ */
+
+static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct net_device *dev = dev_id;
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr;
+ int status;
+ int work_done = max_interrupt_work;
+
+ ioaddr = dev->base_addr;
+ spin_lock(&vp->lock);
+
+ status = inw(ioaddr + EL3_STATUS);
+
+ if (vortex_debug > 6)
+ printk("vortex_interrupt(). status=0x%4x\n", status);
+
+ if ((status & IntLatch) == 0)
+ goto handler_exit; /* No interrupt: shared IRQs cause this */
+
+ if (status & IntReq) {
+ status |= vp->deferred;
+ vp->deferred = 0;
+ }
+
+ if (status == 0xffff) /* h/w no longer present (hotplug)? */
+ goto handler_exit;
+
+ if (vortex_debug > 4)
+ printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
+ dev->name, status, inb(ioaddr + Timer));
+
+ do {
+ if (vortex_debug > 5)
+ printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
+ dev->name, status);
+ if (status & RxComplete)
+ vortex_rx(dev);
+
+ if (status & TxAvailable) {
+ if (vortex_debug > 5)
+ printk(KERN_DEBUG " TX room bit was handled.\n");
+ /* There's room in the FIFO for a full-sized packet. */
+ outw(AckIntr | TxAvailable, ioaddr + EL3_CMD);
+ netif_wake_queue (dev);
+ }
+
+ if (status & DMADone) {
+ if (inw(ioaddr + Wn7_MasterStatus) & 0x1000) {
+ outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
+ pci_unmap_single(vp->pdev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
+ dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
+ if (inw(ioaddr + TxFree) > 1536) {
+ /*
+ * AKPM: FIXME: I don't think we need this. If the queue was stopped due to
+ * insufficient FIFO room, the TxAvailable test will succeed and call
+ * netif_wake_queue()
+ */
+ netif_wake_queue(dev);
+ } else { /* Interrupt when FIFO has room for max-sized packet. */
+ outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
+ netif_stop_queue(dev);
+ }
+ }
+ }
+ /* Check for all uncommon interrupts at once. */
+ if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) {
+ if (status == 0xffff)
+ break;
+ vortex_error(dev, status);
+ }
+
+ if (--work_done < 0) {
+ printk(KERN_WARNING "%s: Too much work in interrupt, status "
+ "%4.4x.\n", dev->name, status);
+ /* Disable all pending interrupts. */
+ do {
+ vp->deferred |= status;
+ outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
+ ioaddr + EL3_CMD);
+ outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
+ } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
+ /* The timer will reenable interrupts. */
+ mod_timer(&vp->timer, jiffies + 1*HZ);
+ break;
+ }
+ /* Acknowledge the IRQ. */
+ outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
+ } while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete));
+
+ if (vortex_debug > 4)
+ printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
+ dev->name, status);
+handler_exit:
+ spin_unlock(&vp->lock);
+}
+
+/*
+ * This is the ISR for the boomerang series chips.
+ * full_bus_master_tx == 1 && full_bus_master_rx == 1
+ */
+
+static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct net_device *dev = dev_id;
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr;
+ int status;
+ int work_done = max_interrupt_work;
+
+ ioaddr = dev->base_addr;
+
+ /*
+ * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout
+ * and boomerang_start_xmit
+ */
+ spin_lock(&vp->lock);
+
+ status = inw(ioaddr + EL3_STATUS);
+
+ if (vortex_debug > 6)
+ printk(KERN_DEBUG "boomerang_interrupt. status=0x%4x\n", status);
+
+ if ((status & IntLatch) == 0)
+ goto handler_exit; /* No interrupt: shared IRQs can cause this */
+
+ if (status == 0xffff) { /* h/w no longer present (hotplug)? */
+ if (vortex_debug > 1)
+ printk(KERN_DEBUG "boomerang_interrupt(1): status = 0xffff\n");
+ goto handler_exit;
+ }
+
+ if (status & IntReq) {
+ status |= vp->deferred;
+ vp->deferred = 0;
+ }
+
+ if (vortex_debug > 4)
+ printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
+ dev->name, status, inb(ioaddr + Timer));
+ do {
+ if (vortex_debug > 5)
+ printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
+ dev->name, status);
+ if (status & UpComplete) {
+ outw(AckIntr | UpComplete, ioaddr + EL3_CMD);
+ if (vortex_debug > 5)
+ printk(KERN_DEBUG "boomerang_interrupt->boomerang_rx\n");
+ boomerang_rx(dev);
+ }
+
+ if (status & DownComplete) {
+ unsigned int dirty_tx = vp->dirty_tx;
+
+ outw(AckIntr | DownComplete, ioaddr + EL3_CMD);
+ while (vp->cur_tx - dirty_tx > 0) {
+ int entry = dirty_tx % TX_RING_SIZE;
+#if 1 /* AKPM: the latter is faster, but cyclone-only */
+ if (inl(ioaddr + DownListPtr) ==
+ vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc))
+ break; /* It still hasn't been processed. */
+#else
+ if ((vp->tx_ring[entry].status & DN_COMPLETE) == 0)
+ break; /* It still hasn't been processed. */
+#endif
+
+ if (vp->tx_skbuff[entry]) {
+ struct sk_buff *skb = vp->tx_skbuff[entry];
+#if DO_ZEROCOPY
+ int i;
+ for (i=0; i<=skb_shinfo(skb)->nr_frags; i++)
+ pci_unmap_single(vp->pdev,
+ le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
+ le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
+ PCI_DMA_TODEVICE);
+#else
+ pci_unmap_single(vp->pdev,
+ le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
+#endif
+ dev_kfree_skb_irq(skb);
+ vp->tx_skbuff[entry] = 0;
+ } else {
+ printk(KERN_DEBUG "boomerang_interrupt: no skb!\n");
+ }
+ /* vp->stats.tx_packets++; Counted below. */
+ dirty_tx++;
+ }
+ vp->dirty_tx = dirty_tx;
+ if (vp->cur_tx - dirty_tx <= TX_RING_SIZE - 1) {
+ if (vortex_debug > 6)
+ printk(KERN_DEBUG "boomerang_interrupt: wake queue\n");
+ netif_wake_queue (dev);
+ }
+ }
+
+ /* Check for all uncommon interrupts at once. */
+ if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq))
+ vortex_error(dev, status);
+
+ if (--work_done < 0) {
+ printk(KERN_WARNING "%s: Too much work in interrupt, status "
+ "%4.4x.\n", dev->name, status);
+ /* Disable all pending interrupts. */
+ do {
+ vp->deferred |= status;
+ outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
+ ioaddr + EL3_CMD);
+ outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
+ } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
+ /* The timer will reenable interrupts. */
+ mod_timer(&vp->timer, jiffies + 1*HZ);
+ break;
+ }
+ /* Acknowledge the IRQ. */
+ outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
+ if (vp->cb_fn_base) /* The PCMCIA people are idiots. */
+ writel(0x8000, vp->cb_fn_base + 4);
+
+ } while ((status = inw(ioaddr + EL3_STATUS)) & IntLatch);
+
+ if (vortex_debug > 4)
+ printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
+ dev->name, status);
+handler_exit:
+ spin_unlock(&vp->lock);
+}
+
+static int vortex_rx(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+ int i;
+ short rx_status;
+
+ if (vortex_debug > 5)
+ printk(KERN_DEBUG "vortex_rx(): status %4.4x, rx_status %4.4x.\n",
+ inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus));
+ while ((rx_status = inw(ioaddr + RxStatus)) > 0) {
+ if (rx_status & 0x4000) { /* Error, update stats. */
+ unsigned char rx_error = inb(ioaddr + RxErrors);
+ if (vortex_debug > 2)
+ printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+ vp->stats.rx_errors++;
+ if (rx_error & 0x01) vp->stats.rx_over_errors++;
+ if (rx_error & 0x02) vp->stats.rx_length_errors++;
+ if (rx_error & 0x04) vp->stats.rx_frame_errors++;
+ if (rx_error & 0x08) vp->stats.rx_crc_errors++;
+ if (rx_error & 0x10) vp->stats.rx_length_errors++;
+ } else {
+ /* The packet length: up to 4.5K!. */
+ int pkt_len = rx_status & 0x1fff;
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(pkt_len + 5);
+ if (vortex_debug > 4)
+ printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
+ pkt_len, rx_status);
+ if (skb != NULL) {
+ skb->dev = dev;
+ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
+ /* 'skb_put()' points to the start of sk_buff data area. */
+ if (vp->bus_master &&
+ ! (inw(ioaddr + Wn7_MasterStatus) & 0x8000)) {
+ dma_addr_t dma = pci_map_single(vp->pdev, skb_put(skb, pkt_len),
+ pkt_len, PCI_DMA_FROMDEVICE);
+ outl(dma, ioaddr + Wn7_MasterAddr);
+ outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
+ outw(StartDMAUp, ioaddr + EL3_CMD);
+ while (inw(ioaddr + Wn7_MasterStatus) & 0x8000)
+ ;
+ pci_unmap_single(vp->pdev, dma, pkt_len, PCI_DMA_FROMDEVICE);
+ } else {
+ insl(ioaddr + RX_FIFO, skb_put(skb, pkt_len),
+ (pkt_len + 3) >> 2);
+ }
+ outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+ vp->stats.rx_packets++;
+ /* Wait a limited time to go to next packet. */
+ for (i = 200; i >= 0; i--)
+ if ( ! (inw(ioaddr + EL3_STATUS) & CmdInProgress))
+ break;
+ continue;
+ } else if (vortex_debug > 0)
+ printk(KERN_NOTICE "%s: No memory to allocate a sk_buff of "
+ "size %d.\n", dev->name, pkt_len);
+ }
+ vp->stats.rx_dropped++;
+ issue_and_wait(dev, RxDiscard);
+ }
+
+ return 0;
+}
+
+static int
+boomerang_rx(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ int entry = vp->cur_rx % RX_RING_SIZE;
+ long ioaddr = dev->base_addr;
+ int rx_status;
+ int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx;
+
+ if (vortex_debug > 5)
+ printk(KERN_DEBUG "boomerang_rx(): status %4.4x\n", inw(ioaddr+EL3_STATUS));
+
+ while ((rx_status = le32_to_cpu(vp->rx_ring[entry].status)) & RxDComplete){
+ if (--rx_work_limit < 0)
+ break;
+ if (rx_status & RxDError) { /* Error, update stats. */
+ unsigned char rx_error = rx_status >> 16;
+ if (vortex_debug > 2)
+ printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+ vp->stats.rx_errors++;
+ if (rx_error & 0x01) vp->stats.rx_over_errors++;
+ if (rx_error & 0x02) vp->stats.rx_length_errors++;
+ if (rx_error & 0x04) vp->stats.rx_frame_errors++;
+ if (rx_error & 0x08) vp->stats.rx_crc_errors++;
+ if (rx_error & 0x10) vp->stats.rx_length_errors++;
+ } else {
+ /* The packet length: up to 4.5K!. */
+ int pkt_len = rx_status & 0x1fff;
+ struct sk_buff *skb;
+ dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
+
+ if (vortex_debug > 4)
+ printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
+ pkt_len, rx_status);
+
+ /* Check if the packet is long enough to just accept without
+ copying to a properly sized skbuff. */
+ if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
+ skb->dev = dev;
+ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
+ pci_dma_sync_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ /* 'skb_put()' points to the start of sk_buff data area. */
+ memcpy(skb_put(skb, pkt_len),
+ vp->rx_skbuff[entry]->tail,
+ pkt_len);
+ vp->rx_copy++;
+ } else {
+ /* Pass up the skbuff already on the Rx ring. */
+ skb = vp->rx_skbuff[entry];
+ vp->rx_skbuff[entry] = NULL;
+ skb_put(skb, pkt_len);
+ pci_unmap_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ vp->rx_nocopy++;
+ }
+ skb->protocol = eth_type_trans(skb, dev);
+ { /* Use hardware checksum info. */
+ int csum_bits = rx_status & 0xee000000;
+ if (csum_bits &&
+ (csum_bits == (IPChksumValid | TCPChksumValid) ||
+ csum_bits == (IPChksumValid | UDPChksumValid))) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ vp->rx_csumhits++;
+ }
+ }
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+ vp->stats.rx_packets++;
+ }
+ entry = (++vp->cur_rx) % RX_RING_SIZE;
+ }
+ /* Refill the Rx ring buffers. */
+ for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
+ struct sk_buff *skb;
+ entry = vp->dirty_rx % RX_RING_SIZE;
+ if (vp->rx_skbuff[entry] == NULL) {
+ skb = dev_alloc_skb(PKT_BUF_SZ);
+ if (skb == NULL) {
+ static unsigned long last_jif;
+ if ((jiffies - last_jif) > 10 * HZ) {
+ printk(KERN_WARNING "%s: memory shortage\n", dev->name);
+ last_jif = jiffies;
+ }
+ if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
+ mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
+ break; /* Bad news! */
+ }
+ skb->dev = dev; /* Mark as being used by this device. */
+ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
+ vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+ vp->rx_skbuff[entry] = skb;
+ }
+ vp->rx_ring[entry].status = 0; /* Clear complete bit. */
+ outw(UpUnstall, ioaddr + EL3_CMD);
+ }
+ return 0;
+}
+
+/*
+ * If we've hit a total OOM refilling the Rx ring we poll once a second
+ * for some memory. Otherwise there is no way to restart the rx process.
+ */
+static void
+rx_oom_timer(unsigned long arg)
+{
+ struct net_device *dev = (struct net_device *)arg;
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+
+ spin_lock_irq(&vp->lock);
+ if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */
+ boomerang_rx(dev);
+ if (vortex_debug > 1) {
+ printk(KERN_DEBUG "%s: rx_oom_timer %s\n", dev->name,
+ ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying");
+ }
+ spin_unlock_irq(&vp->lock);
+}
+
+static void
+vortex_down(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+
+ netif_stop_queue (dev);
+
+ del_timer_sync(&vp->rx_oom_timer);
+ del_timer_sync(&vp->timer);
+
+ /* Turn off statistics ASAP. We update vp->stats below. */
+ outw(StatsDisable, ioaddr + EL3_CMD);
+
+ /* Disable the receiver and transmitter. */
+ outw(RxDisable, ioaddr + EL3_CMD);
+ outw(TxDisable, ioaddr + EL3_CMD);
+
+ if (dev->if_port == XCVR_10base2)
+ /* Turn off thinnet power. Green! */
+ outw(StopCoax, ioaddr + EL3_CMD);
+
+ outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD);
+
+ update_stats(ioaddr, dev);
+ if (vp->full_bus_master_rx)
+ outl(0, ioaddr + UpListPtr);
+ if (vp->full_bus_master_tx)
+ outl(0, ioaddr + DownListPtr);
+
+ if (vp->pdev && vp->enable_wol) {
+ pci_save_state(vp->pdev, vp->power_state);
+ acpi_set_WOL(dev);
+ }
+}
+
+static int
+vortex_close(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+ int i;
+
+ if (netif_device_present(dev))
+ vortex_down(dev);
+
+ if (vortex_debug > 1) {
+ printk(KERN_DEBUG"%s: vortex_close() status %4.4x, Tx status %2.2x.\n",
+ dev->name, inw(ioaddr + EL3_STATUS), inb(ioaddr + TxStatus));
+ printk(KERN_DEBUG "%s: vortex close stats: rx_nocopy %d rx_copy %d"
+ " tx_queued %d Rx pre-checksummed %d.\n",
+ dev->name, vp->rx_nocopy, vp->rx_copy, vp->queued_packet, vp->rx_csumhits);
+ }
+
+#if DO_ZEROCOPY
+ if ( vp->rx_csumhits &&
+ ((vp->drv_flags & HAS_HWCKSM) == 0) &&
+ (hw_checksums[vp->card_idx] == -1)) {
+ printk(KERN_WARNING "%s supports hardware checksums, and we're not using them!\n", dev->name);
+ printk(KERN_WARNING "Please see http://www.uow.edu.au/~andrewm/zerocopy.html\n");
+ }
+#endif
+
+ free_irq(dev->irq, dev);
+
+ if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
+ for (i = 0; i < RX_RING_SIZE; i++)
+ if (vp->rx_skbuff[i]) {
+ pci_unmap_single( vp->pdev, le32_to_cpu(vp->rx_ring[i].addr),
+ PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(vp->rx_skbuff[i]);
+ vp->rx_skbuff[i] = 0;
+ }
+ }
+ if (vp->full_bus_master_tx) { /* Free Boomerang bus master Tx buffers. */
+ for (i = 0; i < TX_RING_SIZE; i++) {
+ if (vp->tx_skbuff[i]) {
+ struct sk_buff *skb = vp->tx_skbuff[i];
+#if DO_ZEROCOPY
+ int k;
+
+ for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
+ pci_unmap_single(vp->pdev,
+ le32_to_cpu(vp->tx_ring[i].frag[k].addr),
+ le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
+ PCI_DMA_TODEVICE);
+#else
+ pci_unmap_single(vp->pdev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
+#endif
+ dev_kfree_skb(skb);
+ vp->tx_skbuff[i] = 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void
+dump_tx_ring(struct net_device *dev)
+{
+ if (vortex_debug > 0) {
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+
+ if (vp->full_bus_master_tx) {
+ int i;
+ int stalled = inl(ioaddr + PktStatus) & 0x04; /* Possible racy. But it's only debug stuff */
+
+ printk(KERN_ERR " Flags; bus-master %d, dirty %d(%d) current %d(%d)\n",
+ vp->full_bus_master_tx,
+ vp->dirty_tx, vp->dirty_tx % TX_RING_SIZE,
+ vp->cur_tx, vp->cur_tx % TX_RING_SIZE);
+ printk(KERN_ERR " Transmit list %8.8x vs. %p.\n",
+ inl(ioaddr + DownListPtr),
+ &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]);
+ issue_and_wait(dev, DownStall);
+ for (i = 0; i < TX_RING_SIZE; i++) {
+ printk(KERN_ERR " %d: @%p length %8.8x status %8.8x\n", i,
+ &vp->tx_ring[i],
+#if DO_ZEROCOPY
+ le32_to_cpu(vp->tx_ring[i].frag[0].length),
+#else
+ le32_to_cpu(vp->tx_ring[i].length),
+#endif
+ le32_to_cpu(vp->tx_ring[i].status));
+ }
+ if (!stalled)
+ outw(DownUnstall, ioaddr + EL3_CMD);
+ }
+ }
+}
+
+static struct net_device_stats *vortex_get_stats(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ unsigned long flags;
+
+ if (netif_device_present(dev)) { /* AKPM: Used to be netif_running */
+ spin_lock_irqsave (&vp->lock, flags);
+ update_stats(dev->base_addr, dev);
+ spin_unlock_irqrestore (&vp->lock, flags);
+ }
+ return &vp->stats;
+}
+
+/* Update statistics.
+ Unlike with the EL3 we need not worry about interrupts changing
+ the window setting from underneath us, but we must still guard
+ against a race condition with a StatsUpdate interrupt updating the
+ table. This is done by checking that the ASM (!) code generated uses
+ atomic updates with '+='.
+ */
+static void update_stats(long ioaddr, struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ int old_window = inw(ioaddr + EL3_CMD);
+
+ if (old_window == 0xffff) /* Chip suspended or ejected. */
+ return;
+ /* Unlike the 3c5x9 we need not turn off stats updates while reading. */
+ /* Switch to the stats window, and read everything. */
+ EL3WINDOW(6);
+ vp->stats.tx_carrier_errors += inb(ioaddr + 0);
+ vp->stats.tx_heartbeat_errors += inb(ioaddr + 1);
+ /* Multiple collisions. */ inb(ioaddr + 2);
+ vp->stats.collisions += inb(ioaddr + 3);
+ vp->stats.tx_window_errors += inb(ioaddr + 4);
+ vp->stats.rx_fifo_errors += inb(ioaddr + 5);
+ vp->stats.tx_packets += inb(ioaddr + 6);
+ vp->stats.tx_packets += (inb(ioaddr + 9)&0x30) << 4;
+ /* Rx packets */ inb(ioaddr + 7); /* Must read to clear */
+ /* Tx deferrals */ inb(ioaddr + 8);
+ /* Don't bother with register 9, an extension of registers 6&7.
+ If we do use the 6&7 values the atomic update assumption above
+ is invalid. */
+ vp->stats.rx_bytes += inw(ioaddr + 10);
+ vp->stats.tx_bytes += inw(ioaddr + 12);
+ /* New: On the Vortex we must also clear the BadSSD counter. */
+ EL3WINDOW(4);
+ inb(ioaddr + 12);
+
+ {
+ u8 up = inb(ioaddr + 13);
+ vp->stats.rx_bytes += (up & 0x0f) << 16;
+ vp->stats.tx_bytes += (up & 0xf0) << 12;
+ }
+
+ EL3WINDOW(old_window >> 13);
+ return;
+}
+
+
+static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr)
+{
+ struct vortex_private *vp = dev->priv;
+ u32 ethcmd;
+
+ if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
+ return -EFAULT;
+
+ switch (ethcmd) {
+ case ETHTOOL_GDRVINFO: {
+ struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO};
+ strcpy(info.driver, DRV_NAME);
+ strcpy(info.version, DRV_VERSION);
+ if (vp->pdev)
+ strcpy(info.bus_info, vp->pdev->slot_name);
+ else
+ sprintf(info.bus_info, "EISA 0x%lx %d",
+ dev->base_addr, dev->irq);
+ if (copy_to_user(useraddr, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+ }
+
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+ struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
+ int phy = vp->phys[0] & 0x1f;
+ int retval;
+
+ switch(cmd) {
+ case SIOCETHTOOL:
+ return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data);
+
+ case SIOCGMIIPHY: /* Get address of MII PHY in use. */
+ case SIOCDEVPRIVATE: /* for binary compat, remove in 2.5 */
+ data->phy_id = phy;
+
+ case SIOCGMIIREG: /* Read MII PHY register. */
+ case SIOCDEVPRIVATE+1: /* for binary compat, remove in 2.5 */
+ EL3WINDOW(4);
+ data->val_out = mdio_read(dev, data->phy_id & 0x1f, data->reg_num & 0x1f);
+ retval = 0;
+ break;
+
+ case SIOCSMIIREG: /* Write MII PHY register. */
+ case SIOCDEVPRIVATE+2: /* for binary compat, remove in 2.5 */
+ if (!capable(CAP_NET_ADMIN)) {
+ retval = -EPERM;
+ } else {
+ EL3WINDOW(4);
+ mdio_write(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in);
+ retval = 0;
+ }
+ break;
+ default:
+ retval = -EOPNOTSUPP;
+ break;
+ }
+
+ return retval;
+}
+
+/* Pre-Cyclone chips have no documented multicast filter, so the only
+ multicast setting is to receive all multicast frames. At least
+ the chip has a very clean way to set the mode, unlike many others. */
+static void set_rx_mode(struct net_device *dev)
+{
+ long ioaddr = dev->base_addr;
+ int new_mode;
+
+ if (dev->flags & IFF_PROMISC) {
+ if (vortex_debug > 0)
+ printk(KERN_NOTICE "%s: Setting promiscuous mode.\n", dev->name);
+ new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast|RxProm;
+ } else if ((dev->mc_list) || (dev->flags & IFF_ALLMULTI)) {
+ new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast;
+ } else
+ new_mode = SetRxFilter | RxStation | RxBroadcast;
+
+ outw(new_mode, ioaddr + EL3_CMD);
+}
+
+/* MII transceiver control section.
+ Read and write the MII registers using software-generated serial
+ MDIO protocol. See the MII specifications or DP83840A data sheet
+ for details. */
+
+/* The maximum data clock rate is 2.5 Mhz. The minimum timing is usually
+ met by back-to-back PCI I/O cycles, but we insert a delay to avoid
+ "overclocking" issues. */
+#define mdio_delay() inl(mdio_addr)
+
+#define MDIO_SHIFT_CLK 0x01
+#define MDIO_DIR_WRITE 0x04
+#define MDIO_DATA_WRITE0 (0x00 | MDIO_DIR_WRITE)
+#define MDIO_DATA_WRITE1 (0x02 | MDIO_DIR_WRITE)
+#define MDIO_DATA_READ 0x02
+#define MDIO_ENB_IN 0x00
+
+/* Generate the preamble required for initial synchronization and
+ a few older transceivers. */
+static void mdio_sync(long ioaddr, int bits)
+{
+ long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+
+ /* Establish sync by sending at least 32 logic ones. */
+ while (-- bits >= 0) {
+ outw(MDIO_DATA_WRITE1, mdio_addr);
+ mdio_delay();
+ outw(MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
+ mdio_delay();
+ }
+}
+
+static int mdio_read(struct net_device *dev, int phy_id, int location)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ int i;
+ long ioaddr = dev->base_addr;
+ int read_cmd = (0xf6 << 10) | (phy_id << 5) | location;
+ unsigned int retval = 0;
+ long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+
+ spin_lock_bh(&vp->mdio_lock);
+
+ if (mii_preamble_required)
+ mdio_sync(ioaddr, 32);
+
+ /* Shift the read command bits out. */
+ for (i = 14; i >= 0; i--) {
+ int dataval = (read_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
+ outw(dataval, mdio_addr);
+ mdio_delay();
+ outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
+ mdio_delay();
+ }
+ /* Read the two transition, 16 data, and wire-idle bits. */
+ for (i = 19; i > 0; i--) {
+ outw(MDIO_ENB_IN, mdio_addr);
+ mdio_delay();
+ retval = (retval << 1) | ((inw(mdio_addr) & MDIO_DATA_READ) ? 1 : 0);
+ outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+ mdio_delay();
+ }
+ spin_unlock_bh(&vp->mdio_lock);
+ return retval & 0x20000 ? 0xffff : retval>>1 & 0xffff;
+}
+
+static void mdio_write(struct net_device *dev, int phy_id, int location, int value)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+ int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value;
+ long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+ int i;
+
+ spin_lock_bh(&vp->mdio_lock);
+
+ if (mii_preamble_required)
+ mdio_sync(ioaddr, 32);
+
+ /* Shift the command bits out. */
+ for (i = 31; i >= 0; i--) {
+ int dataval = (write_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
+ outw(dataval, mdio_addr);
+ mdio_delay();
+ outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
+ mdio_delay();
+ }
+ /* Leave the interface idle. */
+ for (i = 1; i >= 0; i--) {
+ outw(MDIO_ENB_IN, mdio_addr);
+ mdio_delay();
+ outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+ mdio_delay();
+ }
+ spin_unlock_bh(&vp->mdio_lock);
+ return;
+}
+
+/* ACPI: Advanced Configuration and Power Interface. */
+/* Set Wake-On-LAN mode and put the board into D3 (power-down) state. */
+static void acpi_set_WOL(struct net_device *dev)
+{
+ struct vortex_private *vp = (struct vortex_private *)dev->priv;
+ long ioaddr = dev->base_addr;
+
+ /* Power up on: 1==Downloaded Filter, 2==Magic Packets, 4==Link Status. */
+ EL3WINDOW(7);
+ outw(2, ioaddr + 0x0c);
+ /* The RxFilter must accept the WOL frames. */
+ outw(SetRxFilter|RxStation|RxMulticast|RxBroadcast, ioaddr + EL3_CMD);
+ outw(RxEnable, ioaddr + EL3_CMD);
+
+ /* Change the power state to D3; RxEnable doesn't take effect. */
+ pci_enable_wake(vp->pdev, 0, 1);
+ pci_set_power_state(vp->pdev, 3);
+}
+
+
+static void __devexit vortex_remove_one (struct pci_dev *pdev)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct vortex_private *vp;
+
+ if (!dev) {
+ printk("vortex_remove_one called for EISA device!\n");
+ BUG();
+ }
+
+ vp = dev->priv;
+
+ /* AKPM: FIXME: we should have
+ * if (vp->cb_fn_base) iounmap(vp->cb_fn_base);
+ * here
+ */
+ unregister_netdev(dev);
+ /* Should really use issue_and_wait() here */
+ outw(TotalReset|0x14, dev->base_addr + EL3_CMD);
+
+ if (vp->pdev && vp->enable_wol) {
+ pci_set_power_state(vp->pdev, 0); /* Go active */
+ if (vp->pm_state_valid)
+ pci_restore_state(vp->pdev, vp->power_state);
+ }
+
+ pci_free_consistent(pdev,
+ sizeof(struct boom_rx_desc) * RX_RING_SIZE
+ + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+ vp->rx_ring,
+ vp->rx_ring_dma);
+ if (vp->must_free_region)
+ release_region(dev->base_addr, vp->io_size);
+ kfree(dev);
+}
+
+
+static struct pci_driver vortex_driver = {
+ name: "3c59x",
+ probe: vortex_init_one,
+ remove: __devexit_p(vortex_remove_one),
+ id_table: vortex_pci_tbl,
+#ifdef CONFIG_PM
+ suspend: vortex_suspend,
+ resume: vortex_resume,
+#endif
+};
+
+
+static int vortex_have_pci;
+static int vortex_have_eisa;
+
+
+static int __init vortex_init (void)
+{
+ int pci_rc, eisa_rc;
+
+ pci_rc = pci_module_init(&vortex_driver);
+ eisa_rc = vortex_eisa_init();
+
+ if (pci_rc == 0)
+ vortex_have_pci = 1;
+ if (eisa_rc > 0)
+ vortex_have_eisa = 1;
+
+ return (vortex_have_pci + vortex_have_eisa) ? 0 : -ENODEV;
+}
+
+
+static void __exit vortex_eisa_cleanup (void)
+{
+ struct net_device *dev, *tmp;
+ struct vortex_private *vp;
+ long ioaddr;
+
+ dev = root_vortex_eisa_dev;
+
+ while (dev) {
+ vp = dev->priv;
+ ioaddr = dev->base_addr;
+
+ unregister_netdev (dev);
+ outw (TotalReset, ioaddr + EL3_CMD);
+ release_region (ioaddr, VORTEX_TOTAL_SIZE);
+
+ tmp = dev;
+ dev = vp->next_module;
+
+ kfree (tmp);
+ }
+}
+
+
+static void __exit vortex_cleanup (void)
+{
+ if (vortex_have_pci)
+ pci_unregister_driver (&vortex_driver);
+ if (vortex_have_eisa)
+ vortex_eisa_cleanup ();
+}
+
+
+module_init(vortex_init);
+module_exit(vortex_cleanup);
diff --git a/xen/drivers/net/Makefile b/xen/drivers/net/Makefile
new file mode 100644
index 0000000000..34954de493
--- /dev/null
+++ b/xen/drivers/net/Makefile
@@ -0,0 +1,13 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(MAKE) -C ne
+ $(MAKE) -C e1000
+ $(LD) -r -o driver.o e1000/e1000.o $(OBJS) ne/ne_drv.o
+
+clean:
+ $(MAKE) -C ne clean
+ rm -f *.o *~ core
+
+.PHONY: default clean
diff --git a/xen/drivers/net/Space.c b/xen/drivers/net/Space.c
new file mode 100644
index 0000000000..5724837106
--- /dev/null
+++ b/xen/drivers/net/Space.c
@@ -0,0 +1,44 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Holds initial configuration information for devices.
+ *
+ * Version: @(#)Space.c 1.0.7 08/12/93
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Donald J. Becker, <becker@scyld.com>
+ */
+#include <linux/config.h>
+#include <linux/netdevice.h>
+
+/*
+ * KAF (23/7/02): All the probe shit is gone from here -- each network
+ * driver should probe as part of its setup, and dynamically append
+ * to dev_base when it finds a NIC.
+ */
+
+/*
+ * The @dev_base list is protected by @dev_base_lock and the rtln
+ * semaphore.
+ *
+ * Pure readers hold dev_base_lock for reading.
+ *
+ * Writers must hold the rtnl semaphore while they loop through the
+ * dev_base list, and hold dev_base_lock for writing when they do the
+ * actual updates. This allows pure readers to access the list even
+ * while a writer is preparing to update it.
+ *
+ * To put it another way, dev_base_lock is held for writing only to
+ * protect against pure readers; the rtnl semaphore provides the
+ * protection against other writers.
+ *
+ * See, for example usages, register_netdevice() and
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+struct net_device *dev_base = NULL;
+rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+
diff --git a/xen/drivers/net/e1000/LICENSE b/xen/drivers/net/e1000/LICENSE
new file mode 100644
index 0000000000..5f297e5bb4
--- /dev/null
+++ b/xen/drivers/net/e1000/LICENSE
@@ -0,0 +1,339 @@
+
+"This software program is licensed subject to the GNU General Public License
+(GPL). Version 2, June 1991, available at
+<http://www.fsf.org/copyleft/gpl.html>"
+
+GNU General Public License
+
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to
+share and change it. By contrast, the GNU General Public License is intended
+to guarantee your freedom to share and change free software--to make sure
+the software is free for all its users. This General Public License applies
+to most of the Free Software Foundation's software and to any other program
+whose authors commit to using it. (Some other Free Software Foundation
+software is covered by the GNU Library General Public License instead.) You
+can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom
+to distribute copies of free software (and charge for this service if you
+wish), that you receive source code or can get it if you want it, that you
+can change the software or use pieces of it in new free programs; and that
+you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to
+deny you these rights or to ask you to surrender the rights. These
+restrictions translate to certain responsibilities for you if you distribute
+copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or
+for a fee, you must give the recipients all the rights that you have. You
+must make sure that they, too, receive or can get the source code. And you
+must show them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and (2)
+offer you this license which gives you legal permission to copy, distribute
+and/or modify the software.
+
+Also, for each author's protection and ours, we want to make certain that
+everyone understands that there is no warranty for this free software. If
+the software is modified by someone else and passed on, we want its
+recipients to know that what they have is not the original, so that any
+problems introduced by others will not reflect on the original authors'
+reputations.
+
+Finally, any free program is threatened constantly by software patents. We
+wish to avoid the danger that redistributors of a free program will
+individually obtain patent licenses, in effect making the program
+proprietary. To prevent this, we have made it clear that any patent must be
+licensed for everyone's free use or not licensed at all.
+
+The precise terms and conditions for copying, distribution and modification
+follow.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+ placed by the copyright holder saying it may be distributed under the
+ terms of this General Public License. The "Program", below, refers to any
+ such program or work, and a "work based on the Program" means either the
+ Program or any derivative work under copyright law: that is to say, a
+ work containing the Program or a portion of it, either verbatim or with
+ modifications and/or translated into another language. (Hereinafter,
+ translation is included without limitation in the term "modification".)
+ Each licensee is addressed as "you".
+
+ Activities other than copying, distribution and modification are not
+ covered by this License; they are outside its scope. The act of running
+ the Program is not restricted, and the output from the Program is covered
+ only if its contents constitute a work based on the Program (independent
+ of having been made by running the Program). Whether that is true depends
+ on what the Program does.
+
+1. You may copy and distribute verbatim copies of the Program's source code
+ as you receive it, in any medium, provided that you conspicuously and
+ appropriately publish on each copy an appropriate copyright notice and
+ disclaimer of warranty; keep intact all the notices that refer to this
+ License and to the absence of any warranty; and give any other recipients
+ of the Program a copy of this License along with the Program.
+
+ You may charge a fee for the physical act of transferring a copy, and you
+ may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Program or any portion of it,
+ thus forming a work based on the Program, and copy and distribute such
+ modifications or work under the terms of Section 1 above, provided that
+ you also meet all of these conditions:
+
+ * a) You must cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change.
+
+ * b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any part
+ thereof, to be licensed as a whole at no charge to all third parties
+ under the terms of this License.
+
+ * c) If the modified program normally reads commands interactively when
+ run, you must cause it, when started running for such interactive
+ use in the most ordinary way, to print or display an announcement
+ including an appropriate copyright notice and a notice that there is
+ no warranty (or else, saying that you provide a warranty) and that
+ users may redistribute the program under these conditions, and
+ telling the user how to view a copy of this License. (Exception: if
+ the Program itself is interactive but does not normally print such
+ an announcement, your work based on the Program is not required to
+ print an announcement.)
+
+ These requirements apply to the modified work as a whole. If identifiable
+ sections of that work are not derived from the Program, and can be
+ reasonably considered independent and separate works in themselves, then
+ this License, and its terms, do not apply to those sections when you
+ distribute them as separate works. But when you distribute the same
+ sections as part of a whole which is a work based on the Program, the
+ distribution of the whole must be on the terms of this License, whose
+ permissions for other licensees extend to the entire whole, and thus to
+ each and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or contest
+ your rights to work written entirely by you; rather, the intent is to
+ exercise the right to control the distribution of derivative or
+ collective works based on the Program.
+
+ In addition, mere aggregation of another work not based on the Program
+ with the Program (or with a work based on the Program) on a volume of a
+ storage or distribution medium does not bring the other work under the
+ scope of this License.
+
+3. You may copy and distribute the Program (or a work based on it, under
+ Section 2) in object code or executable form under the terms of Sections
+ 1 and 2 above provided that you also do one of the following:
+
+ * a) Accompany it with the complete corresponding machine-readable source
+ code, which must be distributed under the terms of Sections 1 and 2
+ above on a medium customarily used for software interchange; or,
+
+ * b) Accompany it with a written offer, valid for at least three years,
+ to give any third party, for a charge no more than your cost of
+ physically performing source distribution, a complete machine-
+ readable copy of the corresponding source code, to be distributed
+ under the terms of Sections 1 and 2 above on a medium customarily
+ used for software interchange; or,
+
+ * c) Accompany it with the information you received as to the offer to
+ distribute corresponding source code. (This alternative is allowed
+ only for noncommercial distribution and only if you received the
+ program in object code or executable form with such an offer, in
+ accord with Subsection b above.)
+
+ The source code for a work means the preferred form of the work for
+ making modifications to it. For an executable work, complete source code
+ means all the source code for all modules it contains, plus any
+ associated interface definition files, plus the scripts used to control
+ compilation and installation of the executable. However, as a special
+ exception, the source code distributed need not include anything that is
+ normally distributed (in either source or binary form) with the major
+ components (compiler, kernel, and so on) of the operating system on which
+ the executable runs, unless that component itself accompanies the
+ executable.
+
+ If distribution of executable or object code is made by offering access
+ to copy from a designated place, then offering equivalent access to copy
+ the source code from the same place counts as distribution of the source
+ code, even though third parties are not compelled to copy the source
+ along with the object code.
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+ expressly provided under this License. Any attempt otherwise to copy,
+ modify, sublicense or distribute the Program is void, and will
+ automatically terminate your rights under this License. However, parties
+ who have received copies, or rights, from you under this License will not
+ have their licenses terminated so long as such parties remain in full
+ compliance.
+
+5. You are not required to accept this License, since you have not signed
+ it. However, nothing else grants you permission to modify or distribute
+ the Program or its derivative works. These actions are prohibited by law
+ if you do not accept this License. Therefore, by modifying or
+ distributing the Program (or any work based on the Program), you
+ indicate your acceptance of this License to do so, and all its terms and
+ conditions for copying, distributing or modifying the Program or works
+ based on it.
+
+6. Each time you redistribute the Program (or any work based on the
+ Program), the recipient automatically receives a license from the
+ original licensor to copy, distribute or modify the Program subject to
+ these terms and conditions. You may not impose any further restrictions
+ on the recipients' exercise of the rights granted herein. You are not
+ responsible for enforcing compliance by third parties to this License.
+
+7. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent issues),
+ conditions are imposed on you (whether by court order, agreement or
+ otherwise) that contradict the conditions of this License, they do not
+ excuse you from the conditions of this License. If you cannot distribute
+ so as to satisfy simultaneously your obligations under this License and
+ any other pertinent obligations, then as a consequence you may not
+ distribute the Program at all. For example, if a patent license would
+ not permit royalty-free redistribution of the Program by all those who
+ receive copies directly or indirectly through you, then the only way you
+ could satisfy both it and this License would be to refrain entirely from
+ distribution of the Program.
+
+ If any portion of this section is held invalid or unenforceable under any
+ particular circumstance, the balance of the section is intended to apply
+ and the section as a whole is intended to apply in other circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of any
+ such claims; this section has the sole purpose of protecting the
+ integrity of the free software distribution system, which is implemented
+ by public license practices. Many people have made generous contributions
+ to the wide range of software distributed through that system in
+ reliance on consistent application of that system; it is up to the
+ author/donor to decide if he or she is willing to distribute software
+ through any other system and a licensee cannot impose that choice.
+
+ This section is intended to make thoroughly clear what is believed to be
+ a consequence of the rest of this License.
+
+8. If the distribution and/or use of the Program is restricted in certain
+ countries either by patents or by copyrighted interfaces, the original
+ copyright holder who places the Program under this License may add an
+ explicit geographical distribution limitation excluding those countries,
+ so that distribution is permitted only in or among countries not thus
+ excluded. In such case, this License incorporates the limitation as if
+ written in the body of this License.
+
+9. The Free Software Foundation may publish revised and/or new versions of
+ the General Public License from time to time. Such new versions will be
+ similar in spirit to the present version, but may differ in detail to
+ address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the Program
+ specifies a version number of this License which applies to it and "any
+ later version", you have the option of following the terms and
+ conditions either of that version or of any later version published by
+ the Free Software Foundation. If the Program does not specify a version
+ number of this License, you may choose any version ever published by the
+ Free Software Foundation.
+
+10. If you wish to incorporate parts of the Program into other free programs
+ whose distribution conditions are different, write to the author to ask
+ for permission. For software which is copyrighted by the Free Software
+ Foundation, write to the Free Software Foundation; we sometimes make
+ exceptions for this. Our decision will be guided by the two goals of
+ preserving the free status of all derivatives of our free software and
+ of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+ EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH
+ YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+ NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
+ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+ DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
+ (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+ INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+ THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR
+ OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it free
+software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey the
+exclusion of warranty; and each file should have at least the "copyright"
+line and a pointer to where the full notice is found.
+
+one line to give the program's name and an idea of what it does.
+Copyright (C) yyyy name of author
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this when
+it starts in an interactive mode:
+
+Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
+with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free
+software, and you are welcome to redistribute it under certain conditions;
+type 'show c' for details.
+
+The hypothetical commands 'show w' and 'show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may be
+called something other than 'show w' and 'show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+'Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+signature of Ty Coon, 1 April 1989
+Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General Public
+License instead of this License.
diff --git a/xen/drivers/net/e1000/Makefile b/xen/drivers/net/e1000/Makefile
new file mode 100644
index 0000000000..f262fcf32b
--- /dev/null
+++ b/xen/drivers/net/e1000/Makefile
@@ -0,0 +1,39 @@
+################################################################################
+#
+#
+# Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# The full GNU General Public License is included in this distribution in the
+# file called LICENSE.
+#
+# Contact Information:
+# Linux NICS <linux.nics@intel.com>
+# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+#
+################################################################################
+
+#
+# Makefile for the Intel(R) PRO/1000 ethernet driver
+#
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o e1000.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen/drivers/net/e1000/e1000.h b/xen/drivers/net/e1000/e1000.h
new file mode 100644
index 0000000000..d94e390ba3
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000.h
@@ -0,0 +1,208 @@
+/*******************************************************************************
+
+
+ Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _E1000_H_
+#define _E1000_H_
+
+//#include <linux/stddef.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+//#include <linux/string.h>
+//#include <linux/pagemap.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+//#include <linux/capability.h>
+#include <linux/in.h>
+//#include <linux/ip.h>
+//#include <linux/tcp.h>
+//#include <linux/udp.h>
+//#include <net/pkt_sched.h>
+#include <linux/list.h>
+#include <linux/reboot.h>
+#include <linux/tqueue.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+
+#define BAR_0 0
+#define BAR_1 1
+#define BAR_5 5
+#define PCI_DMA_64BIT 0xffffffffffffffffULL
+#define PCI_DMA_32BIT 0x00000000ffffffffULL
+
+
+struct e1000_adapter;
+
+// XEN XXX
+// #define DBG 1
+
+#include "e1000_hw.h"
+
+#if DBG
+#define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args)
+#else
+#define E1000_DBG(args...)
+#endif
+
+#define E1000_ERR(args...) printk(KERN_ERR "e1000: " args)
+
+#define E1000_MAX_INTR 10
+
+/* Supported Rx Buffer Sizes */
+#define E1000_RXBUFFER_2048 2048
+#define E1000_RXBUFFER_4096 4096
+#define E1000_RXBUFFER_8192 8192
+#define E1000_RXBUFFER_16384 16384
+
+/* Flow Control High-Watermark: 43464 bytes */
+#define E1000_FC_HIGH_THRESH 0xA9C8
+
+/* Flow Control Low-Watermark: 43456 bytes */
+#define E1000_FC_LOW_THRESH 0xA9C0
+
+/* Flow Control Pause Time: 858 usec */
+#define E1000_FC_PAUSE_TIME 0x0680
+
+/* How many Tx Descriptors do we need to call netif_wake_queue ? */
+#define E1000_TX_QUEUE_WAKE 16
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define E1000_RX_BUFFER_WRITE 16
+
+#define E1000_JUMBO_PBA 0x00000028
+#define E1000_DEFAULT_PBA 0x00000030
+
+#define AUTO_ALL_MODES 0
+#define E1000_EEPROM_APME 4
+
+/* only works for sizes that are powers of 2 */
+#define E1000_ROUNDUP(i, size) ((i) = (((i) + (size) - 1) & ~((size) - 1)))
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct e1000_buffer {
+ struct sk_buff *skb;
+ uint64_t dma;
+ unsigned long length;
+ unsigned long time_stamp;
+};
+
+struct e1000_desc_ring {
+ /* pointer to the descriptor ring memory */
+ void *desc;
+ /* physical address of the descriptor ring */
+ dma_addr_t dma;
+ /* length of descriptor ring in bytes */
+ unsigned int size;
+ /* number of descriptors in the ring */
+ unsigned int count;
+ /* next descriptor to associate a buffer with */
+ unsigned int next_to_use;
+ /* next descriptor to check for DD status bit */
+ unsigned int next_to_clean;
+ /* array of buffer information structs */
+ struct e1000_buffer *buffer_info;
+};
+
+#define E1000_DESC_UNUSED(R) \
+((((R)->next_to_clean + (R)->count) - ((R)->next_to_use + 1)) % ((R)->count))
+
+#define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i]))
+#define E1000_RX_DESC(R, i) E1000_GET_DESC(R, i, e1000_rx_desc)
+#define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc)
+#define E1000_CONTEXT_DESC(R, i) E1000_GET_DESC(R, i, e1000_context_desc)
+
+/* board specific private data structure */
+
+struct e1000_adapter {
+ struct timer_list watchdog_timer;
+ struct timer_list phy_info_timer;
+ struct vlan_group *vlgrp;
+ char *id_string;
+ uint32_t bd_number;
+ uint32_t rx_buffer_len;
+ uint32_t part_num;
+ uint32_t wol;
+ uint16_t link_speed;
+ uint16_t link_duplex;
+ spinlock_t stats_lock;
+ atomic_t irq_sem;
+ struct tq_struct tx_timeout_task;
+
+ struct timer_list blink_timer;
+ unsigned long led_status;
+
+ /* TX */
+ struct e1000_desc_ring tx_ring;
+ uint32_t txd_cmd;
+ uint32_t tx_int_delay;
+ uint32_t tx_abs_int_delay;
+ int max_data_per_txd;
+
+ /* RX */
+ struct e1000_desc_ring rx_ring;
+ uint64_t hw_csum_err;
+ uint64_t hw_csum_good;
+ uint32_t rx_int_delay;
+ uint32_t rx_abs_int_delay;
+ boolean_t rx_csum;
+
+ /* OS defined structs */
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+ struct net_device_stats net_stats;
+
+ /* structs defined in e1000_hw.h */
+ struct e1000_hw hw;
+ struct e1000_hw_stats stats;
+ struct e1000_phy_info phy_info;
+ struct e1000_phy_stats phy_stats;
+
+
+
+ uint32_t pci_state[16];
+ char ifname[IFNAMSIZ];
+};
+#endif /* _E1000_H_ */
diff --git a/xen/drivers/net/e1000/e1000_ethtool.c b/xen/drivers/net/e1000/e1000_ethtool.c
new file mode 100644
index 0000000000..d06ef79c6e
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_ethtool.c
@@ -0,0 +1,611 @@
+/*******************************************************************************
+
+
+ Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool support for e1000 */
+
+#include "e1000.h"
+
+#include <asm/uaccess.h>
+
+extern char e1000_driver_name[];
+extern char e1000_driver_version[];
+
+extern int e1000_up(struct e1000_adapter *adapter);
+extern void e1000_down(struct e1000_adapter *adapter);
+extern void e1000_reset(struct e1000_adapter *adapter);
+
+static char e1000_gstrings_stats[][ETH_GSTRING_LEN] = {
+ "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
+ "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
+ "rx_length_errors", "rx_over_errors", "rx_crc_errors",
+ "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
+ "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
+ "tx_heartbeat_errors", "tx_window_errors",
+};
+#define E1000_STATS_LEN sizeof(e1000_gstrings_stats) / ETH_GSTRING_LEN
+
+static void
+e1000_ethtool_gset(struct e1000_adapter *adapter, struct ethtool_cmd *ecmd)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ if(hw->media_type == e1000_media_type_copper) {
+
+ ecmd->supported = (SUPPORTED_10baseT_Half |
+ SUPPORTED_10baseT_Full |
+ SUPPORTED_100baseT_Half |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_1000baseT_Full|
+ SUPPORTED_Autoneg |
+ SUPPORTED_TP);
+
+ ecmd->advertising = ADVERTISED_TP;
+
+ if(hw->autoneg == 1) {
+ ecmd->advertising |= ADVERTISED_Autoneg;
+
+ /* the e1000 autoneg seems to match ethtool nicely */
+
+ ecmd->advertising |= hw->autoneg_advertised;
+ }
+
+ ecmd->port = PORT_TP;
+ ecmd->phy_address = hw->phy_addr;
+
+ if(hw->mac_type == e1000_82543)
+ ecmd->transceiver = XCVR_EXTERNAL;
+ else
+ ecmd->transceiver = XCVR_INTERNAL;
+
+ } else {
+ ecmd->supported = (SUPPORTED_1000baseT_Full |
+ SUPPORTED_FIBRE |
+ SUPPORTED_Autoneg);
+
+ ecmd->advertising = (SUPPORTED_1000baseT_Full |
+ SUPPORTED_FIBRE |
+ SUPPORTED_Autoneg);
+
+ ecmd->port = PORT_FIBRE;
+
+ if(hw->mac_type >= e1000_82545)
+ ecmd->transceiver = XCVR_INTERNAL;
+ else
+ ecmd->transceiver = XCVR_EXTERNAL;
+ }
+
+ if(netif_carrier_ok(adapter->netdev)) {
+
+ e1000_get_speed_and_duplex(hw, &adapter->link_speed,
+ &adapter->link_duplex);
+ ecmd->speed = adapter->link_speed;
+
+ /* unfortunatly FULL_DUPLEX != DUPLEX_FULL
+ * and HALF_DUPLEX != DUPLEX_HALF */
+
+ if(adapter->link_duplex == FULL_DUPLEX)
+ ecmd->duplex = DUPLEX_FULL;
+ else
+ ecmd->duplex = DUPLEX_HALF;
+ } else {
+ ecmd->speed = -1;
+ ecmd->duplex = -1;
+ }
+
+ ecmd->autoneg = (hw->autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+}
+
+static int
+e1000_ethtool_sset(struct e1000_adapter *adapter, struct ethtool_cmd *ecmd)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ if(ecmd->autoneg == AUTONEG_ENABLE) {
+ hw->autoneg = 1;
+ hw->autoneg_advertised = 0x002F;
+ ecmd->advertising = 0x002F;
+ } else {
+ hw->autoneg = 0;
+ switch(ecmd->speed + ecmd->duplex) {
+ case SPEED_10 + DUPLEX_HALF:
+ hw->forced_speed_duplex = e1000_10_half;
+ break;
+ case SPEED_10 + DUPLEX_FULL:
+ hw->forced_speed_duplex = e1000_10_full;
+ break;
+ case SPEED_100 + DUPLEX_HALF:
+ hw->forced_speed_duplex = e1000_100_half;
+ break;
+ case SPEED_100 + DUPLEX_FULL:
+ hw->forced_speed_duplex = e1000_100_full;
+ break;
+ case SPEED_1000 + DUPLEX_FULL:
+ hw->autoneg = 1;
+ hw->autoneg_advertised = ADVERTISE_1000_FULL;
+ break;
+ case SPEED_1000 + DUPLEX_HALF: /* not supported */
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* reset the link */
+
+ if(netif_running(adapter->netdev)) {
+ e1000_down(adapter);
+ e1000_up(adapter);
+ } else
+ e1000_reset(adapter);
+
+ return 0;
+}
+
+static inline int
+e1000_eeprom_size(struct e1000_hw *hw)
+{
+ if((hw->mac_type > e1000_82544) &&
+ (E1000_READ_REG(hw, EECD) & E1000_EECD_SIZE))
+ return 512;
+ else
+ return 128;
+}
+
+static void
+e1000_ethtool_gdrvinfo(struct e1000_adapter *adapter,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strncpy(drvinfo->driver, e1000_driver_name, 32);
+ strncpy(drvinfo->version, e1000_driver_version, 32);
+ strncpy(drvinfo->fw_version, "N/A", 32);
+ strncpy(drvinfo->bus_info, adapter->pdev->slot_name, 32);
+ drvinfo->n_stats = E1000_STATS_LEN;
+#define E1000_REGS_LEN 32
+ drvinfo->regdump_len = E1000_REGS_LEN * sizeof(uint32_t);
+ drvinfo->eedump_len = e1000_eeprom_size(&adapter->hw);
+}
+
+static void
+e1000_ethtool_gregs(struct e1000_adapter *adapter,
+ struct ethtool_regs *regs, uint32_t *regs_buff)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id;
+
+ regs_buff[0] = E1000_READ_REG(hw, CTRL);
+ regs_buff[1] = E1000_READ_REG(hw, STATUS);
+
+ regs_buff[2] = E1000_READ_REG(hw, RCTL);
+ regs_buff[3] = E1000_READ_REG(hw, RDLEN);
+ regs_buff[4] = E1000_READ_REG(hw, RDH);
+ regs_buff[5] = E1000_READ_REG(hw, RDT);
+ regs_buff[6] = E1000_READ_REG(hw, RDTR);
+
+ regs_buff[7] = E1000_READ_REG(hw, TCTL);
+ regs_buff[8] = E1000_READ_REG(hw, TDLEN);
+ regs_buff[9] = E1000_READ_REG(hw, TDH);
+ regs_buff[10] = E1000_READ_REG(hw, TDT);
+ regs_buff[11] = E1000_READ_REG(hw, TIDV);
+
+ return;
+}
+
+static int
+e1000_ethtool_geeprom(struct e1000_adapter *adapter,
+ struct ethtool_eeprom *eeprom, uint16_t *eeprom_buff)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int max_len, first_word, last_word;
+ int ret_val = 0;
+ int i;
+
+ if(eeprom->len == 0) {
+ ret_val = -EINVAL;
+ goto geeprom_error;
+ }
+
+ eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+ max_len = e1000_eeprom_size(hw);
+
+ if(eeprom->offset > eeprom->offset + eeprom->len) {
+ ret_val = -EINVAL;
+ goto geeprom_error;
+ }
+
+ if((eeprom->offset + eeprom->len) > max_len)
+ eeprom->len = (max_len - eeprom->offset);
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+ for(i = 0; i <= (last_word - first_word); i++)
+ e1000_read_eeprom(hw, first_word + i, &eeprom_buff[i]);
+
+geeprom_error:
+ return ret_val;
+}
+
+static int
+e1000_ethtool_seeprom(struct e1000_adapter *adapter,
+ struct ethtool_eeprom *eeprom, void *user_data)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ uint16_t *eeprom_buff;
+ int max_len, first_word, last_word;
+ void *ptr;
+ int i;
+
+ if(eeprom->len == 0)
+ return -EOPNOTSUPP;
+
+ if(eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+ return -EFAULT;
+
+ max_len = e1000_eeprom_size(hw);
+
+ if((eeprom->offset + eeprom->len) > max_len)
+ eeprom->len = (max_len - eeprom->offset);
+
+ first_word = eeprom->offset >> 1;
+ last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+ eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+ if(eeprom_buff == NULL)
+ return -ENOMEM;
+
+ ptr = (void *)eeprom_buff;
+
+ if(eeprom->offset & 1) {
+ /* need read/modify/write of first changed EEPROM word */
+ /* only the second byte of the word is being modified */
+ e1000_read_eeprom(hw, first_word, &eeprom_buff[0]);
+ ptr++;
+ }
+ if((eeprom->offset + eeprom->len) & 1) {
+ /* need read/modify/write of last changed EEPROM word */
+ /* only the first byte of the word is being modified */
+ e1000_read_eeprom(hw, last_word,
+ &eeprom_buff[last_word - first_word]);
+ }
+ if(copy_from_user(ptr, user_data, eeprom->len)) {
+ kfree(eeprom_buff);
+ return -EFAULT;
+ }
+
+ for(i = 0; i <= (last_word - first_word); i++)
+ e1000_write_eeprom(hw, first_word + i, eeprom_buff[i]);
+
+ /* Update the checksum over the first part of the EEPROM if needed */
+ if(first_word <= EEPROM_CHECKSUM_REG)
+ e1000_update_eeprom_checksum(hw);
+
+ kfree(eeprom_buff);
+
+ return 0;
+}
+
+static void
+e1000_ethtool_gwol(struct e1000_adapter *adapter, struct ethtool_wolinfo *wol)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ switch(adapter->hw.device_id) {
+ case E1000_DEV_ID_82542:
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
+ wol->supported = 0;
+ wol->wolopts = 0;
+ return;
+
+ case E1000_DEV_ID_82546EB_FIBER:
+ /* Wake events only supported on port A for dual fiber */
+ if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) {
+ wol->supported = 0;
+ wol->wolopts = 0;
+ return;
+ }
+ /* Fall Through */
+
+ default:
+ wol->supported = WAKE_UCAST | WAKE_MCAST
+ | WAKE_BCAST | WAKE_MAGIC;
+
+ wol->wolopts = 0;
+ if(adapter->wol & E1000_WUFC_EX)
+ wol->wolopts |= WAKE_UCAST;
+ if(adapter->wol & E1000_WUFC_MC)
+ wol->wolopts |= WAKE_MCAST;
+ if(adapter->wol & E1000_WUFC_BC)
+ wol->wolopts |= WAKE_BCAST;
+ if(adapter->wol & E1000_WUFC_MAG)
+ wol->wolopts |= WAKE_MAGIC;
+ return;
+ }
+}
+
+static int
+e1000_ethtool_swol(struct e1000_adapter *adapter, struct ethtool_wolinfo *wol)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ switch(adapter->hw.device_id) {
+ case E1000_DEV_ID_82542:
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
+ return wol->wolopts ? -EOPNOTSUPP : 0;
+
+ case E1000_DEV_ID_82546EB_FIBER:
+ /* Wake events only supported on port A for dual fiber */
+ if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)
+ return wol->wolopts ? -EOPNOTSUPP : 0;
+ /* Fall Through */
+
+ default:
+ if(wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE | WAKE_PHY))
+ return -EOPNOTSUPP;
+
+ adapter->wol = 0;
+
+ if(wol->wolopts & WAKE_UCAST)
+ adapter->wol |= E1000_WUFC_EX;
+ if(wol->wolopts & WAKE_MCAST)
+ adapter->wol |= E1000_WUFC_MC;
+ if(wol->wolopts & WAKE_BCAST)
+ adapter->wol |= E1000_WUFC_BC;
+ if(wol->wolopts & WAKE_MAGIC)
+ adapter->wol |= E1000_WUFC_MAG;
+ }
+
+ return 0;
+}
+
+
+/* toggle LED 4 times per second = 2 "blinks" per second */
+#define E1000_ID_INTERVAL (HZ/4)
+
+/* bit defines for adapter->led_status */
+#define E1000_LED_ON 0
+
+static void
+e1000_led_blink_callback(unsigned long data)
+{
+ struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+
+ if(test_and_change_bit(E1000_LED_ON, &adapter->led_status))
+ e1000_led_off(&adapter->hw);
+ else
+ e1000_led_on(&adapter->hw);
+
+ mod_timer(&adapter->blink_timer, jiffies + E1000_ID_INTERVAL);
+}
+
+static int
+e1000_ethtool_led_blink(struct e1000_adapter *adapter, struct ethtool_value *id)
+{
+ if(!adapter->blink_timer.function) {
+ init_timer(&adapter->blink_timer);
+ adapter->blink_timer.function = e1000_led_blink_callback;
+ adapter->blink_timer.data = (unsigned long) adapter;
+ }
+
+ e1000_setup_led(&adapter->hw);
+ mod_timer(&adapter->blink_timer, jiffies);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if(id->data)
+ schedule_timeout(id->data * HZ);
+ else
+ schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+
+ del_timer_sync(&adapter->blink_timer);
+ e1000_led_off(&adapter->hw);
+ clear_bit(E1000_LED_ON, &adapter->led_status);
+ e1000_cleanup_led(&adapter->hw);
+
+ return 0;
+}
+
+int
+e1000_ethtool_ioctl(struct net_device *netdev, struct ifreq *ifr)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ void *addr = ifr->ifr_data;
+ uint32_t cmd;
+
+ if(get_user(cmd, (uint32_t *) addr))
+ return -EFAULT;
+
+ switch(cmd) {
+ case ETHTOOL_GSET: {
+ struct ethtool_cmd ecmd = {ETHTOOL_GSET};
+ e1000_ethtool_gset(adapter, &ecmd);
+ if(copy_to_user(addr, &ecmd, sizeof(ecmd)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SSET: {
+ struct ethtool_cmd ecmd;
+ if(!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if(copy_from_user(&ecmd, addr, sizeof(ecmd)))
+ return -EFAULT;
+ return e1000_ethtool_sset(adapter, &ecmd);
+ }
+ case ETHTOOL_GDRVINFO: {
+ struct ethtool_drvinfo drvinfo = {ETHTOOL_GDRVINFO};
+ e1000_ethtool_gdrvinfo(adapter, &drvinfo);
+ if(copy_to_user(addr, &drvinfo, sizeof(drvinfo)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_GSTRINGS: {
+ struct ethtool_gstrings gstrings = { ETHTOOL_GSTRINGS };
+ char *strings = NULL;
+
+ if(copy_from_user(&gstrings, addr, sizeof(gstrings)))
+ return -EFAULT;
+ switch(gstrings.string_set) {
+ case ETH_SS_STATS:
+ gstrings.len = E1000_STATS_LEN;
+ strings = *e1000_gstrings_stats;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ if(copy_to_user(addr, &gstrings, sizeof(gstrings)))
+ return -EFAULT;
+ addr += offsetof(struct ethtool_gstrings, data);
+ if(copy_to_user(addr, strings,
+ gstrings.len * ETH_GSTRING_LEN))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_GREGS: {
+ struct ethtool_regs regs = {ETHTOOL_GREGS};
+ uint32_t regs_buff[E1000_REGS_LEN];
+
+ if(copy_from_user(&regs, addr, sizeof(regs)))
+ return -EFAULT;
+ e1000_ethtool_gregs(adapter, &regs, regs_buff);
+ if(copy_to_user(addr, &regs, sizeof(regs)))
+ return -EFAULT;
+
+ addr += offsetof(struct ethtool_regs, data);
+ if(copy_to_user(addr, regs_buff, regs.len))
+ return -EFAULT;
+
+ return 0;
+ }
+ case ETHTOOL_NWAY_RST: {
+ if(!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if(netif_running(netdev)) {
+ e1000_down(adapter);
+ e1000_up(adapter);
+ }
+ return 0;
+ }
+ case ETHTOOL_PHYS_ID: {
+ struct ethtool_value id;
+ if(copy_from_user(&id, addr, sizeof(id)))
+ return -EFAULT;
+ return e1000_ethtool_led_blink(adapter, &id);
+ }
+ case ETHTOOL_GLINK: {
+ struct ethtool_value link = {ETHTOOL_GLINK};
+ link.data = netif_carrier_ok(netdev);
+ if(copy_to_user(addr, &link, sizeof(link)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_GWOL: {
+ struct ethtool_wolinfo wol = {ETHTOOL_GWOL};
+ e1000_ethtool_gwol(adapter, &wol);
+ if(copy_to_user(addr, &wol, sizeof(wol)) != 0)
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SWOL: {
+ struct ethtool_wolinfo wol;
+ if(!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if(copy_from_user(&wol, addr, sizeof(wol)) != 0)
+ return -EFAULT;
+ return e1000_ethtool_swol(adapter, &wol);
+ }
+ case ETHTOOL_GEEPROM: {
+ struct ethtool_eeprom eeprom = {ETHTOOL_GEEPROM};
+ uint16_t *eeprom_buff;
+ void *ptr;
+ int max_len, err = 0;
+
+ max_len = e1000_eeprom_size(&adapter->hw);
+
+ eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+
+ if(eeprom_buff == NULL)
+ return -ENOMEM;
+
+ if(copy_from_user(&eeprom, addr, sizeof(eeprom))) {
+ err = -EFAULT;
+ goto err_geeprom_ioctl;
+ }
+
+ if((err = e1000_ethtool_geeprom(adapter, &eeprom,
+ eeprom_buff)))
+ goto err_geeprom_ioctl;
+
+ if(copy_to_user(addr, &eeprom, sizeof(eeprom))) {
+ err = -EFAULT;
+ goto err_geeprom_ioctl;
+ }
+
+ addr += offsetof(struct ethtool_eeprom, data);
+ ptr = ((void *)eeprom_buff) + (eeprom.offset & 1);
+
+ if(copy_to_user(addr, ptr, eeprom.len))
+ err = -EFAULT;
+
+err_geeprom_ioctl:
+ kfree(eeprom_buff);
+ return err;
+ }
+ case ETHTOOL_SEEPROM: {
+ struct ethtool_eeprom eeprom;
+
+ if(!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if(copy_from_user(&eeprom, addr, sizeof(eeprom)))
+ return -EFAULT;
+
+ addr += offsetof(struct ethtool_eeprom, data);
+ return e1000_ethtool_seeprom(adapter, &eeprom, addr);
+ }
+ case ETHTOOL_GSTATS: {
+ struct {
+ struct ethtool_stats cmd;
+ uint64_t data[E1000_STATS_LEN];
+ } stats = { {ETHTOOL_GSTATS, E1000_STATS_LEN} };
+ int i;
+
+ for(i = 0; i < E1000_STATS_LEN; i++)
+ stats.data[i] =
+ ((unsigned long *)&adapter->net_stats)[i];
+ if(copy_to_user(addr, &stats, sizeof(stats)))
+ return -EFAULT;
+ return 0;
+ }
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+
diff --git a/xen/drivers/net/e1000/e1000_hw.c b/xen/drivers/net/e1000/e1000_hw.c
new file mode 100644
index 0000000000..1d70dab937
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_hw.c
@@ -0,0 +1,3610 @@
+/*******************************************************************************
+
+
+ Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* e1000_hw.c
+ * Shared functions for accessing and configuring the MAC
+ */
+
+#include "e1000_hw.h"
+
+static int32_t e1000_setup_fiber_link(struct e1000_hw *hw);
+static int32_t e1000_setup_copper_link(struct e1000_hw *hw);
+static int32_t e1000_phy_force_speed_duplex(struct e1000_hw *hw);
+static int32_t e1000_config_mac_to_phy(struct e1000_hw *hw);
+static int32_t e1000_force_mac_fc(struct e1000_hw *hw);
+static void e1000_raise_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl);
+static void e1000_lower_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl);
+static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, uint32_t data, uint16_t count);
+static uint16_t e1000_shift_in_mdi_bits(struct e1000_hw *hw);
+static int32_t e1000_phy_reset_dsp(struct e1000_hw *hw);
+static void e1000_raise_ee_clk(struct e1000_hw *hw, uint32_t *eecd);
+static void e1000_lower_ee_clk(struct e1000_hw *hw, uint32_t *eecd);
+static void e1000_shift_out_ee_bits(struct e1000_hw *hw, uint16_t data, uint16_t count);
+static uint16_t e1000_shift_in_ee_bits(struct e1000_hw *hw);
+static void e1000_setup_eeprom(struct e1000_hw *hw);
+static void e1000_clock_eeprom(struct e1000_hw *hw);
+static void e1000_cleanup_eeprom(struct e1000_hw *hw);
+static void e1000_standby_eeprom(struct e1000_hw *hw);
+static int32_t e1000_id_led_init(struct e1000_hw * hw);
+
+/******************************************************************************
+ * Set the mac type member in the hw struct.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_set_mac_type(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_set_mac_type");
+
+ switch (hw->device_id) {
+ case E1000_DEV_ID_82542:
+ switch (hw->revision_id) {
+ case E1000_82542_2_0_REV_ID:
+ hw->mac_type = e1000_82542_rev2_0;
+ break;
+ case E1000_82542_2_1_REV_ID:
+ hw->mac_type = e1000_82542_rev2_1;
+ break;
+ default:
+ /* Invalid 82542 revision ID */
+ return -E1000_ERR_MAC_TYPE;
+ }
+ break;
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ hw->mac_type = e1000_82543;
+ break;
+ case E1000_DEV_ID_82544EI_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
+ case E1000_DEV_ID_82544GC_COPPER:
+ case E1000_DEV_ID_82544GC_LOM:
+ hw->mac_type = e1000_82544;
+ break;
+ case E1000_DEV_ID_82540EM:
+ case E1000_DEV_ID_82540EM_LOM:
+ case E1000_DEV_ID_82540EP:
+ case E1000_DEV_ID_82540EP_LOM:
+ case E1000_DEV_ID_82540EP_LP:
+ hw->mac_type = e1000_82540;
+ break;
+ case E1000_DEV_ID_82545EM_COPPER:
+ case E1000_DEV_ID_82545EM_FIBER:
+ hw->mac_type = e1000_82545;
+ break;
+ case E1000_DEV_ID_82546EB_COPPER:
+ case E1000_DEV_ID_82546EB_FIBER:
+ hw->mac_type = e1000_82546;
+ break;
+ default:
+ /* Should never have loaded on this device */
+ return -E1000_ERR_MAC_TYPE;
+ }
+ return E1000_SUCCESS;
+}
+/******************************************************************************
+ * Reset the transmit and receive units; mask and clear all interrupts.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_reset_hw(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+ uint32_t ctrl_ext;
+ uint32_t icr;
+ uint32_t manc;
+
+ DEBUGFUNC("e1000_reset_hw");
+ /* For 82542 (rev 2.0), disable MWI before issuing a device reset */
+ if(hw->mac_type == e1000_82542_rev2_0) {
+ DEBUGOUT("Disabling MWI on 82542 rev 2.0\n");
+ e1000_pci_clear_mwi(hw);
+ }
+
+ /* Clear interrupt mask to stop board from generating interrupts */
+ DEBUGOUT("Masking off all interrupts\n");
+ E1000_WRITE_REG(hw, IMC, 0xffffffff);
+
+ /* Disable the Transmit and Receive units. Then delay to allow
+ * any pending transactions to complete before we hit the MAC with
+ * the global reset.
+ */
+ E1000_WRITE_REG(hw, RCTL, 0);
+ E1000_WRITE_REG(hw, TCTL, E1000_TCTL_PSP);
+ E1000_WRITE_FLUSH(hw);
+
+ /* The tbi_compatibility_on Flag must be cleared when Rctl is cleared. */
+ hw->tbi_compatibility_on = FALSE;
+
+ /* Delay to allow any outstanding PCI transactions to complete before
+ * resetting the device
+ */
+ DEBUGOUT("Before delay\n");
+ msec_delay(10);
+
+ /* Issue a global reset to the MAC. This will reset the chip's
+ * transmit, receive, DMA, and link units. It will not effect
+ * the current PCI configuration. The global reset bit is self-
+ * clearing, and should clear within a microsecond.
+ */
+ DEBUGOUT("Issuing a global reset to MAC\n");
+ ctrl = E1000_READ_REG(hw, CTRL);
+
+ if(hw->mac_type > e1000_82543)
+ E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_RST));
+ else
+ E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST));
+
+ /* Force a reload from the EEPROM if necessary */
+ if(hw->mac_type < e1000_82540) {
+ /* Wait for reset to complete */
+ udelay(10);
+ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_EE_RST;
+ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+ E1000_WRITE_FLUSH(hw);
+ /* Wait for EEPROM reload */
+ msec_delay(2);
+ } else {
+ /* Wait for EEPROM reload (it happens automatically) */
+ msec_delay(4);
+ /* Dissable HW ARPs on ASF enabled adapters */
+ manc = E1000_READ_REG(hw, MANC);
+ manc &= ~(E1000_MANC_ARP_EN);
+ E1000_WRITE_REG(hw, MANC, manc);
+ }
+
+ /* Clear interrupt mask to stop board from generating interrupts */
+ DEBUGOUT("Masking off all interrupts\n");
+ E1000_WRITE_REG(hw, IMC, 0xffffffff);
+
+ /* Clear any pending interrupt events. */
+ icr = E1000_READ_REG(hw, ICR);
+
+ /* If MWI was previously enabled, reenable it. */
+ if(hw->mac_type == e1000_82542_rev2_0) {
+ if(hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
+ e1000_pci_set_mwi(hw);
+ }
+}
+
+/******************************************************************************
+ * Performs basic configuration of the adapter.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Assumes that the controller has previously been reset and is in a
+ * post-reset uninitialized state. Initializes the receive address registers,
+ * multicast table, and VLAN filter table. Calls routines to setup link
+ * configuration and flow control settings. Clears all on-chip counters. Leaves
+ * the transmit and receive units disabled and uninitialized.
+ *****************************************************************************/
+int32_t
+e1000_init_hw(struct e1000_hw *hw)
+{
+ uint32_t ctrl, status;
+ uint32_t i;
+ int32_t ret_val;
+ uint16_t pcix_cmd_word;
+ uint16_t pcix_stat_hi_word;
+ uint16_t cmd_mmrbc;
+ uint16_t stat_mmrbc;
+
+ DEBUGFUNC("e1000_init_hw");
+
+ /* Initialize Identification LED */
+ ret_val = e1000_id_led_init(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error Initializing Identification LED\n");
+ return ret_val;
+ }
+
+ /* Set the Media Type and exit with error if it is not valid. */
+ if(hw->mac_type != e1000_82543) {
+ /* tbi_compatibility is only valid on 82543 */
+ hw->tbi_compatibility_en = FALSE;
+ }
+
+ if(hw->mac_type >= e1000_82543) {
+ status = E1000_READ_REG(hw, STATUS);
+ if(status & E1000_STATUS_TBIMODE) {
+ hw->media_type = e1000_media_type_fiber;
+ /* tbi_compatibility not valid on fiber */
+ hw->tbi_compatibility_en = FALSE;
+ } else {
+ hw->media_type = e1000_media_type_copper;
+ }
+ } else {
+ /* This is an 82542 (fiber only) */
+ hw->media_type = e1000_media_type_fiber;
+ }
+
+ /* Disabling VLAN filtering. */
+ DEBUGOUT("Initializing the IEEE VLAN\n");
+ E1000_WRITE_REG(hw, VET, 0);
+
+ e1000_clear_vfta(hw);
+
+ /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */
+ if(hw->mac_type == e1000_82542_rev2_0) {
+ DEBUGOUT("Disabling MWI on 82542 rev 2.0\n");
+ e1000_pci_clear_mwi(hw);
+ E1000_WRITE_REG(hw, RCTL, E1000_RCTL_RST);
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(5);
+ }
+
+ /* Setup the receive address. This involves initializing all of the Receive
+ * Address Registers (RARs 0 - 15).
+ */
+ e1000_init_rx_addrs(hw);
+
+ /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */
+ if(hw->mac_type == e1000_82542_rev2_0) {
+ E1000_WRITE_REG(hw, RCTL, 0);
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(1);
+ if(hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
+ e1000_pci_set_mwi(hw);
+ }
+
+ /* Zero out the Multicast HASH table */
+ DEBUGOUT("Zeroing the MTA\n");
+ for(i = 0; i < E1000_MC_TBL_SIZE; i++)
+ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+
+ /* Set the PCI priority bit correctly in the CTRL register. This
+ * determines if the adapter gives priority to receives, or if it
+ * gives equal priority to transmits and receives.
+ */
+ if(hw->dma_fairness) {
+ ctrl = E1000_READ_REG(hw, CTRL);
+ E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PRIOR);
+ }
+
+ /* Workaround for PCI-X problem when BIOS sets MMRBC incorrectly. */
+ if(hw->bus_type == e1000_bus_type_pcix) {
+ e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word);
+ e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word);
+ cmd_mmrbc = (pcix_cmd_word & PCIX_COMMAND_MMRBC_MASK) >>
+ PCIX_COMMAND_MMRBC_SHIFT;
+ stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >>
+ PCIX_STATUS_HI_MMRBC_SHIFT;
+ if(stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K)
+ stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K;
+ if(cmd_mmrbc > stat_mmrbc) {
+ pcix_cmd_word &= ~PCIX_COMMAND_MMRBC_MASK;
+ pcix_cmd_word |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT;
+ e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word);
+ }
+ }
+
+ /* Call a subroutine to configure the link and setup flow control. */
+ ret_val = e1000_setup_link(hw);
+
+ /* Set the transmit descriptor write-back policy */
+ if(hw->mac_type > e1000_82544) {
+ ctrl = E1000_READ_REG(hw, TXDCTL);
+ ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB;
+ E1000_WRITE_REG(hw, TXDCTL, ctrl);
+ }
+
+ /* Clear all of the statistics registers (clear on read). It is
+ * important that we do this after we have tried to establish link
+ * because the symbol error count will increment wildly if there
+ * is no link.
+ */
+ e1000_clear_hw_cntrs(hw);
+
+ return ret_val;
+}
+
+/******************************************************************************
+ * Configures flow control and link settings.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Determines which flow control settings to use. Calls the apropriate media-
+ * specific link configuration function. Configures the flow control settings.
+ * Assuming the adapter has a valid link partner, a valid link should be
+ * established. Assumes the hardware has previously been reset and the
+ * transmitter and receiver are not enabled.
+ *****************************************************************************/
+int32_t
+e1000_setup_link(struct e1000_hw *hw)
+{
+ uint32_t ctrl_ext;
+ int32_t ret_val;
+ uint16_t eeprom_data;
+
+ DEBUGFUNC("e1000_setup_link");
+
+ /* Read and store word 0x0F of the EEPROM. This word contains bits
+ * that determine the hardware's default PAUSE (flow control) mode,
+ * a bit that determines whether the HW defaults to enabling or
+ * disabling auto-negotiation, and the direction of the
+ * SW defined pins. If there is no SW over-ride of the flow
+ * control setting, then the variable hw->fc will
+ * be initialized based on a value in the EEPROM.
+ */
+ if(e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+
+ if(hw->fc == e1000_fc_default) {
+ if((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 0)
+ hw->fc = e1000_fc_none;
+ else if((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) ==
+ EEPROM_WORD0F_ASM_DIR)
+ hw->fc = e1000_fc_tx_pause;
+ else
+ hw->fc = e1000_fc_full;
+ }
+
+ /* We want to save off the original Flow Control configuration just
+ * in case we get disconnected and then reconnected into a different
+ * hub or switch with different Flow Control capabilities.
+ */
+ if(hw->mac_type == e1000_82542_rev2_0)
+ hw->fc &= (~e1000_fc_tx_pause);
+
+ if((hw->mac_type < e1000_82543) && (hw->report_tx_early == 1))
+ hw->fc &= (~e1000_fc_rx_pause);
+
+ hw->original_fc = hw->fc;
+
+ DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc);
+
+ /* Take the 4 bits from EEPROM word 0x0F that determine the initial
+ * polarity value for the SW controlled pins, and setup the
+ * Extended Device Control reg with that info.
+ * This is needed because one of the SW controlled pins is used for
+ * signal detection. So this should be done before e1000_setup_pcs_link()
+ * or e1000_phy_setup() is called.
+ */
+ if(hw->mac_type == e1000_82543) {
+ ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) <<
+ SWDPIO__EXT_SHIFT);
+ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+ }
+
+ /* Call the necessary subroutine to configure the link. */
+ ret_val = (hw->media_type == e1000_media_type_fiber) ?
+ e1000_setup_fiber_link(hw) :
+ e1000_setup_copper_link(hw);
+
+ /* Initialize the flow control address, type, and PAUSE timer
+ * registers to their default values. This is done even if flow
+ * control is disabled, because it does not hurt anything to
+ * initialize these registers.
+ */
+ DEBUGOUT("Initializing the Flow Control address, type and timer regs\n");
+
+ E1000_WRITE_REG(hw, FCAL, FLOW_CONTROL_ADDRESS_LOW);
+ E1000_WRITE_REG(hw, FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+ E1000_WRITE_REG(hw, FCT, FLOW_CONTROL_TYPE);
+ E1000_WRITE_REG(hw, FCTTV, hw->fc_pause_time);
+
+ /* Set the flow control receive threshold registers. Normally,
+ * these registers will be set to a default threshold that may be
+ * adjusted later by the driver's runtime code. However, if the
+ * ability to transmit pause frames in not enabled, then these
+ * registers will be set to 0.
+ */
+ if(!(hw->fc & e1000_fc_tx_pause)) {
+ E1000_WRITE_REG(hw, FCRTL, 0);
+ E1000_WRITE_REG(hw, FCRTH, 0);
+ } else {
+ /* We need to set up the Receive Threshold high and low water marks
+ * as well as (optionally) enabling the transmission of XON frames.
+ */
+ if(hw->fc_send_xon) {
+ E1000_WRITE_REG(hw, FCRTL, (hw->fc_low_water | E1000_FCRTL_XONE));
+ E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water);
+ } else {
+ E1000_WRITE_REG(hw, FCRTL, hw->fc_low_water);
+ E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water);
+ }
+ }
+ return ret_val;
+}
+
+/******************************************************************************
+ * Sets up link for a fiber based adapter
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Manipulates Physical Coding Sublayer functions in order to configure
+ * link. Assumes the hardware has been previously reset and the transmitter
+ * and receiver are not enabled.
+ *****************************************************************************/
+static int32_t
+e1000_setup_fiber_link(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+ uint32_t status;
+ uint32_t txcw = 0;
+ uint32_t i;
+ uint32_t signal;
+ int32_t ret_val;
+
+ DEBUGFUNC("e1000_setup_fiber_link");
+
+ /* On adapters with a MAC newer that 82544, SW Defineable pin 1 will be
+ * set when the optics detect a signal. On older adapters, it will be
+ * cleared when there is a signal
+ */
+ ctrl = E1000_READ_REG(hw, CTRL);
+ if(hw->mac_type > e1000_82544) signal = E1000_CTRL_SWDPIN1;
+ else signal = 0;
+
+ /* Take the link out of reset */
+ ctrl &= ~(E1000_CTRL_LRST);
+
+ e1000_config_collision_dist(hw);
+
+ /* Check for a software override of the flow control settings, and setup
+ * the device accordingly. If auto-negotiation is enabled, then software
+ * will have to set the "PAUSE" bits to the correct value in the Tranmsit
+ * Config Word Register (TXCW) and re-start auto-negotiation. However, if
+ * auto-negotiation is disabled, then software will have to manually
+ * configure the two flow control enable bits in the CTRL register.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames, but
+ * not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames but we do
+ * not support receiving pause frames).
+ * 3: Both Rx and TX flow control (symmetric) are enabled.
+ */
+ switch (hw->fc) {
+ case e1000_fc_none:
+ /* Flow control is completely disabled by a software over-ride. */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
+ break;
+ case e1000_fc_rx_pause:
+ /* RX Flow control is enabled and TX Flow control is disabled by a
+ * software over-ride. Since there really isn't a way to advertise
+ * that we are capable of RX Pause ONLY, we will advertise that we
+ * support both symmetric and asymmetric RX PAUSE. Later, we will
+ * disable the adapter's ability to send PAUSE frames.
+ */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+ break;
+ case e1000_fc_tx_pause:
+ /* TX Flow control is enabled, and RX Flow control is disabled, by a
+ * software over-ride.
+ */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
+ break;
+ case e1000_fc_full:
+ /* Flow control (both RX and TX) is enabled by a software over-ride. */
+ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+ break;
+ default:
+ DEBUGOUT("Flow control param set incorrectly\n");
+ return -E1000_ERR_CONFIG;
+ break;
+ }
+
+ /* Since auto-negotiation is enabled, take the link out of reset (the link
+ * will be in reset, because we previously reset the chip). This will
+ * restart auto-negotiation. If auto-neogtiation is successful then the
+ * link-up status bit will be set and the flow control enable bits (RFCE
+ * and TFCE) will be set according to their negotiated value.
+ */
+ DEBUGOUT("Auto-negotiation enabled\n");
+
+ E1000_WRITE_REG(hw, TXCW, txcw);
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ E1000_WRITE_FLUSH(hw);
+
+ hw->txcw = txcw;
+ msec_delay(1);
+
+ /* If we have a signal (the cable is plugged in) then poll for a "Link-Up"
+ * indication in the Device Status Register. Time-out if a link isn't
+ * seen in 500 milliseconds seconds (Auto-negotiation should complete in
+ * less than 500 milliseconds even if the other end is doing it in SW).
+ */
+ if((E1000_READ_REG(hw, CTRL) & E1000_CTRL_SWDPIN1) == signal) {
+ DEBUGOUT("Looking for Link\n");
+ for(i = 0; i < (LINK_UP_TIMEOUT / 10); i++) {
+ msec_delay(10);
+ status = E1000_READ_REG(hw, STATUS);
+ if(status & E1000_STATUS_LU) break;
+ }
+ if(i == (LINK_UP_TIMEOUT / 10)) {
+ /* AutoNeg failed to achieve a link, so we'll call
+ * e1000_check_for_link. This routine will force the link up if we
+ * detect a signal. This will allow us to communicate with
+ * non-autonegotiating link partners.
+ */
+ DEBUGOUT("Never got a valid link from auto-neg!!!\n");
+ hw->autoneg_failed = 1;
+ ret_val = e1000_check_for_link(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error while checking for link\n");
+ return ret_val;
+ }
+ hw->autoneg_failed = 0;
+ } else {
+ hw->autoneg_failed = 0;
+ DEBUGOUT("Valid Link Found\n");
+ }
+ } else {
+ DEBUGOUT("No Signal Detected\n");
+ }
+ return 0;
+}
+
+/******************************************************************************
+* Detects which PHY is present and the speed and duplex
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+static int32_t
+e1000_setup_copper_link(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+ int32_t ret_val;
+ uint16_t i;
+ uint16_t phy_data;
+
+ DEBUGFUNC("e1000_setup_copper_link");
+
+ ctrl = E1000_READ_REG(hw, CTRL);
+ /* With 82543, we need to force speed and duplex on the MAC equal to what
+ * the PHY speed and duplex configuration is. In addition, we need to
+ * perform a hardware reset on the PHY to take it out of reset.
+ */
+ if(hw->mac_type > e1000_82543) {
+ ctrl |= E1000_CTRL_SLU;
+ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ } else {
+ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU);
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ e1000_phy_hw_reset(hw);
+ }
+
+ /* Make sure we have a valid PHY */
+ ret_val = e1000_detect_gig_phy(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error, did not detect valid phy.\n");
+ return ret_val;
+ }
+ DEBUGOUT1("Phy ID = %x \n", hw->phy_id);
+
+ /* Enable CRS on TX. This must be set for half-duplex operation. */
+ if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+ /* Options:
+ * MDI/MDI-X = 0 (default)
+ * 0 - Auto for all speeds
+ * 1 - MDI mode
+ * 2 - MDI-X mode
+ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+ */
+ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+ switch (hw->mdix) {
+ case 1:
+ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+ break;
+ case 2:
+ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+ break;
+ case 3:
+ phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+ break;
+ case 0:
+ default:
+ phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+ break;
+ }
+
+ /* Options:
+ * disable_polarity_correction = 0 (default)
+ * Automatic Correction for Reversed Cable Polarity
+ * 0 - Disabled
+ * 1 - Enabled
+ */
+ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+ if(hw->disable_polarity_correction == 1)
+ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+ if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Force TX_CLK in the Extended PHY Specific Control Register
+ * to 25MHz clock.
+ */
+ if(e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ phy_data |= M88E1000_EPSCR_TX_CLK_25;
+
+ if (hw->phy_revision < M88E1011_I_REV_4) {
+ /* Configure Master and Slave downshift values */
+ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+ M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+ phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+ M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+ if(e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+ }
+
+ /* SW Reset the PHY so all changes take effect */
+ ret_val = e1000_phy_reset(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error Resetting the PHY\n");
+ return ret_val;
+ }
+
+ /* Options:
+ * autoneg = 1 (default)
+ * PHY will advertise value(s) parsed from
+ * autoneg_advertised and fc
+ * autoneg = 0
+ * PHY will be set to 10H, 10F, 100H, or 100F
+ * depending on value parsed from forced_speed_duplex.
+ */
+
+ /* Is autoneg enabled? This is enabled by default or by software override.
+ * If so, call e1000_phy_setup_autoneg routine to parse the
+ * autoneg_advertised and fc options. If autoneg is NOT enabled, then the
+ * user should have provided a speed/duplex override. If so, then call
+ * e1000_phy_force_speed_duplex to parse and set this up.
+ */
+ if(hw->autoneg) {
+ /* Perform some bounds checking on the hw->autoneg_advertised
+ * parameter. If this variable is zero, then set it to the default.
+ */
+ hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT;
+
+ /* If autoneg_advertised is zero, we assume it was not defaulted
+ * by the calling code so we set to advertise full capability.
+ */
+ if(hw->autoneg_advertised == 0)
+ hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+
+ DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
+ ret_val = e1000_phy_setup_autoneg(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error Setting up Auto-Negotiation\n");
+ return ret_val;
+ }
+ DEBUGOUT("Restarting Auto-Neg\n");
+
+ /* Restart auto-negotiation by setting the Auto Neg Enable bit and
+ * the Auto Neg Restart bit in the PHY control register.
+ */
+ if(e1000_read_phy_reg(hw, PHY_CTRL, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+ if(e1000_write_phy_reg(hw, PHY_CTRL, phy_data) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Does the user want to wait for Auto-Neg to complete here, or
+ * check at a later time (for example, callback routine).
+ */
+ if(hw->wait_autoneg_complete) {
+ ret_val = e1000_wait_autoneg(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error while waiting for autoneg to complete\n");
+ return ret_val;
+ }
+ }
+ } else {
+ DEBUGOUT("Forcing speed and duplex\n");
+ ret_val = e1000_phy_force_speed_duplex(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error Forcing Speed and Duplex\n");
+ return ret_val;
+ }
+ }
+
+ /* Check link status. Wait up to 100 microseconds for link to become
+ * valid.
+ */
+ for(i = 0; i < 10; i++) {
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(phy_data & MII_SR_LINK_STATUS) {
+ /* We have link, so we need to finish the config process:
+ * 1) Set up the MAC to the current PHY speed/duplex
+ * if we are on 82543. If we
+ * are on newer silicon, we only need to configure
+ * collision distance in the Transmit Control Register.
+ * 2) Set up flow control on the MAC to that established with
+ * the link partner.
+ */
+ if(hw->mac_type >= e1000_82544) {
+ e1000_config_collision_dist(hw);
+ } else {
+ ret_val = e1000_config_mac_to_phy(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error configuring MAC to PHY settings\n");
+ return ret_val;
+ }
+ }
+ ret_val = e1000_config_fc_after_link_up(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error Configuring Flow Control\n");
+ return ret_val;
+ }
+ DEBUGOUT("Valid link established!!!\n");
+ return 0;
+ }
+ udelay(10);
+ }
+
+ DEBUGOUT("Unable to establish link!!!\n");
+ return 0;
+}
+
+/******************************************************************************
+* Configures PHY autoneg and flow control advertisement settings
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+int32_t
+e1000_phy_setup_autoneg(struct e1000_hw *hw)
+{
+ uint16_t mii_autoneg_adv_reg;
+ uint16_t mii_1000t_ctrl_reg;
+
+ DEBUGFUNC("e1000_phy_setup_autoneg");
+
+ /* Read the MII Auto-Neg Advertisement Register (Address 4). */
+ if(e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Read the MII 1000Base-T Control Register (Address 9). */
+ if(e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Need to parse both autoneg_advertised and fc and set up
+ * the appropriate PHY registers. First we will parse for
+ * autoneg_advertised software override. Since we can advertise
+ * a plethora of combinations, we need to check each bit
+ * individually.
+ */
+
+ /* First we clear all the 10/100 mb speed bits in the Auto-Neg
+ * Advertisement Register (Address 4) and the 1000 mb speed bits in
+ * the 1000Base-T Control Register (Address 9).
+ */
+ mii_autoneg_adv_reg &= ~REG4_SPEED_MASK;
+ mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
+
+ DEBUGOUT1("autoneg_advertised %x\n", hw->autoneg_advertised);
+
+ /* Do we want to advertise 10 Mb Half Duplex? */
+ if(hw->autoneg_advertised & ADVERTISE_10_HALF) {
+ DEBUGOUT("Advertise 10mb Half duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+ }
+
+ /* Do we want to advertise 10 Mb Full Duplex? */
+ if(hw->autoneg_advertised & ADVERTISE_10_FULL) {
+ DEBUGOUT("Advertise 10mb Full duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+ }
+
+ /* Do we want to advertise 100 Mb Half Duplex? */
+ if(hw->autoneg_advertised & ADVERTISE_100_HALF) {
+ DEBUGOUT("Advertise 100mb Half duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+ }
+
+ /* Do we want to advertise 100 Mb Full Duplex? */
+ if(hw->autoneg_advertised & ADVERTISE_100_FULL) {
+ DEBUGOUT("Advertise 100mb Full duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+ }
+
+ /* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+ if(hw->autoneg_advertised & ADVERTISE_1000_HALF) {
+ DEBUGOUT("Advertise 1000mb Half duplex requested, request denied!\n");
+ }
+
+ /* Do we want to advertise 1000 Mb Full Duplex? */
+ if(hw->autoneg_advertised & ADVERTISE_1000_FULL) {
+ DEBUGOUT("Advertise 1000mb Full duplex\n");
+ mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+ }
+
+ /* Check for a software override of the flow control settings, and
+ * setup the PHY advertisement registers accordingly. If
+ * auto-negotiation is enabled, then software will have to set the
+ * "PAUSE" bits to the correct value in the Auto-Negotiation
+ * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-negotiation.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames
+ * but we do not support receiving pause frames).
+ * 3: Both Rx and TX flow control (symmetric) are enabled.
+ * other: No software override. The flow control configuration
+ * in the EEPROM is used.
+ */
+ switch (hw->fc) {
+ case e1000_fc_none: /* 0 */
+ /* Flow control (RX & TX) is completely disabled by a
+ * software over-ride.
+ */
+ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ case e1000_fc_rx_pause: /* 1 */
+ /* RX Flow control is enabled, and TX Flow control is
+ * disabled, by a software over-ride.
+ */
+ /* Since there really isn't a way to advertise that we are
+ * capable of RX Pause ONLY, we will advertise that we
+ * support both symmetric and asymmetric RX PAUSE. Later
+ * (in e1000_config_fc_after_link_up) we will disable the
+ *hw's ability to send PAUSE frames.
+ */
+ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ case e1000_fc_tx_pause: /* 2 */
+ /* TX Flow control is enabled, and RX Flow control is
+ * disabled, by a software over-ride.
+ */
+ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+ mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+ break;
+ case e1000_fc_full: /* 3 */
+ /* Flow control (both RX and TX) is enabled by a software
+ * over-ride.
+ */
+ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ default:
+ DEBUGOUT("Flow control param set incorrectly\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ if(e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+ if(e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+ return 0;
+}
+
+/******************************************************************************
+* Force PHY speed and duplex settings to hw->forced_speed_duplex
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+static int32_t
+e1000_phy_force_speed_duplex(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+ int32_t ret_val;
+ uint16_t mii_ctrl_reg;
+ uint16_t mii_status_reg;
+ uint16_t phy_data;
+ uint16_t i;
+
+ DEBUGFUNC("e1000_phy_force_speed_duplex");
+
+ /* Turn off Flow control if we are forcing speed and duplex. */
+ hw->fc = e1000_fc_none;
+
+ DEBUGOUT1("hw->fc = %d\n", hw->fc);
+
+ /* Read the Device Control Register. */
+ ctrl = E1000_READ_REG(hw, CTRL);
+
+ /* Set the bits to Force Speed and Duplex in the Device Ctrl Reg. */
+ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+ ctrl &= ~(DEVICE_SPEED_MASK);
+
+ /* Clear the Auto Speed Detect Enable bit. */
+ ctrl &= ~E1000_CTRL_ASDE;
+
+ /* Read the MII Control Register. */
+ if(e1000_read_phy_reg(hw, PHY_CTRL, &mii_ctrl_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* We need to disable autoneg in order to force link and duplex. */
+
+ mii_ctrl_reg &= ~MII_CR_AUTO_NEG_EN;
+
+ /* Are we forcing Full or Half Duplex? */
+ if(hw->forced_speed_duplex == e1000_100_full ||
+ hw->forced_speed_duplex == e1000_10_full) {
+ /* We want to force full duplex so we SET the full duplex bits in the
+ * Device and MII Control Registers.
+ */
+ ctrl |= E1000_CTRL_FD;
+ mii_ctrl_reg |= MII_CR_FULL_DUPLEX;
+ DEBUGOUT("Full Duplex\n");
+ } else {
+ /* We want to force half duplex so we CLEAR the full duplex bits in
+ * the Device and MII Control Registers.
+ */
+ ctrl &= ~E1000_CTRL_FD;
+ mii_ctrl_reg &= ~MII_CR_FULL_DUPLEX;
+ DEBUGOUT("Half Duplex\n");
+ }
+
+ /* Are we forcing 100Mbps??? */
+ if(hw->forced_speed_duplex == e1000_100_full ||
+ hw->forced_speed_duplex == e1000_100_half) {
+ /* Set the 100Mb bit and turn off the 1000Mb and 10Mb bits. */
+ ctrl |= E1000_CTRL_SPD_100;
+ mii_ctrl_reg |= MII_CR_SPEED_100;
+ mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
+ DEBUGOUT("Forcing 100mb ");
+ } else {
+ /* Set the 10Mb bit and turn off the 1000Mb and 100Mb bits. */
+ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+ mii_ctrl_reg |= MII_CR_SPEED_10;
+ mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
+ DEBUGOUT("Forcing 10mb ");
+ }
+
+ e1000_config_collision_dist(hw);
+
+ /* Write the configured values back to the Device Control Reg. */
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+
+ if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI
+ * forced whenever speed are duplex are forced.
+ */
+ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+ if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+ DEBUGOUT1("M88E1000 PSCR: %x \n", phy_data);
+
+ /* Need to reset the PHY or these changes will be ignored */
+ mii_ctrl_reg |= MII_CR_RESET;
+
+ /* Write back the modified PHY MII control register. */
+ if(e1000_write_phy_reg(hw, PHY_CTRL, mii_ctrl_reg) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+ udelay(1);
+
+ /* The wait_autoneg_complete flag may be a little misleading here.
+ * Since we are forcing speed and duplex, Auto-Neg is not enabled.
+ * But we do want to delay for a period while forcing only so we
+ * don't generate false No Link messages. So we will wait here
+ * only if the user has set wait_autoneg_complete to 1, which is
+ * the default.
+ */
+ if(hw->wait_autoneg_complete) {
+ /* We will wait for autoneg to complete. */
+ DEBUGOUT("Waiting for forced speed/duplex link.\n");
+ mii_status_reg = 0;
+
+ /* We will wait for autoneg to complete or 4.5 seconds to expire. */
+ for(i = PHY_FORCE_TIME; i > 0; i--) {
+ /* Read the MII Status Register and wait for Auto-Neg Complete bit
+ * to be set.
+ */
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(mii_status_reg & MII_SR_LINK_STATUS) break;
+ msec_delay(100);
+ }
+ if(i == 0) { /* We didn't get link */
+ /* Reset the DSP and wait again for link. */
+
+ ret_val = e1000_phy_reset_dsp(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error Resetting PHY DSP\n");
+ return ret_val;
+ }
+ }
+ /* This loop will early-out if the link condition has been met. */
+ for(i = PHY_FORCE_TIME; i > 0; i--) {
+ if(mii_status_reg & MII_SR_LINK_STATUS) break;
+ msec_delay(100);
+ /* Read the MII Status Register and wait for Auto-Neg Complete bit
+ * to be set.
+ */
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ }
+ }
+
+ /* Because we reset the PHY above, we need to re-force TX_CLK in the
+ * Extended PHY Specific Control Register to 25MHz clock. This value
+ * defaults back to a 2.5MHz clock when the PHY is reset.
+ */
+ if(e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ phy_data |= M88E1000_EPSCR_TX_CLK_25;
+ if(e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* In addition, because of the s/w reset above, we need to enable CRS on
+ * TX. This must be set for both full and half duplex operation.
+ */
+ if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+ if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+ return 0;
+}
+
+/******************************************************************************
+* Sets the collision distance in the Transmit Control register
+*
+* hw - Struct containing variables accessed by shared code
+*
+* Link should have been established previously. Reads the speed and duplex
+* information from the Device Status register.
+******************************************************************************/
+void
+e1000_config_collision_dist(struct e1000_hw *hw)
+{
+ uint32_t tctl;
+
+ tctl = E1000_READ_REG(hw, TCTL);
+
+ tctl &= ~E1000_TCTL_COLD;
+ tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
+
+ E1000_WRITE_REG(hw, TCTL, tctl);
+ E1000_WRITE_FLUSH(hw);
+}
+
+/******************************************************************************
+* Sets MAC speed and duplex settings to reflect the those in the PHY
+*
+* hw - Struct containing variables accessed by shared code
+* mii_reg - data to write to the MII control register
+*
+* The contents of the PHY register containing the needed information need to
+* be passed in.
+******************************************************************************/
+static int32_t
+e1000_config_mac_to_phy(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+ uint16_t phy_data;
+
+ DEBUGFUNC("e1000_config_mac_to_phy");
+
+ /* Read the Device Control Register and set the bits to Force Speed
+ * and Duplex.
+ */
+ ctrl = E1000_READ_REG(hw, CTRL);
+ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+ ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS);
+
+ /* Set up duplex in the Device Control and Transmit Control
+ * registers depending on negotiated values.
+ */
+ if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(phy_data & M88E1000_PSSR_DPLX) ctrl |= E1000_CTRL_FD;
+ else ctrl &= ~E1000_CTRL_FD;
+
+ e1000_config_collision_dist(hw);
+
+ /* Set up speed in the Device Control register depending on
+ * negotiated values.
+ */
+ if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
+ ctrl |= E1000_CTRL_SPD_1000;
+ else if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
+ ctrl |= E1000_CTRL_SPD_100;
+ /* Write the configured values back to the Device Control Reg. */
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ return 0;
+}
+
+/******************************************************************************
+ * Forces the MAC's flow control settings.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Sets the TFCE and RFCE bits in the device control register to reflect
+ * the adapter settings. TFCE and RFCE need to be explicitly set by
+ * software when a Copper PHY is used because autonegotiation is managed
+ * by the PHY rather than the MAC. Software must also configure these
+ * bits when link is forced on a fiber connection.
+ *****************************************************************************/
+static int32_t
+e1000_force_mac_fc(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+
+ DEBUGFUNC("e1000_force_mac_fc");
+
+ /* Get the current configuration of the Device Control Register */
+ ctrl = E1000_READ_REG(hw, CTRL);
+
+ /* Because we didn't get link via the internal auto-negotiation
+ * mechanism (we either forced link or we got link via PHY
+ * auto-neg), we have to manually enable/disable transmit an
+ * receive flow control.
+ *
+ * The "Case" statement below enables/disable flow control
+ * according to the "hw->fc" parameter.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause
+ * frames but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames
+ * frames but we do not receive pause frames).
+ * 3: Both Rx and TX flow control (symmetric) is enabled.
+ * other: No other values should be possible at this point.
+ */
+
+ switch (hw->fc) {
+ case e1000_fc_none:
+ ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
+ break;
+ case e1000_fc_rx_pause:
+ ctrl &= (~E1000_CTRL_TFCE);
+ ctrl |= E1000_CTRL_RFCE;
+ break;
+ case e1000_fc_tx_pause:
+ ctrl &= (~E1000_CTRL_RFCE);
+ ctrl |= E1000_CTRL_TFCE;
+ break;
+ case e1000_fc_full:
+ ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
+ break;
+ default:
+ DEBUGOUT("Flow control param set incorrectly\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ /* Disable TX Flow Control for 82542 (rev 2.0) */
+ if(hw->mac_type == e1000_82542_rev2_0)
+ ctrl &= (~E1000_CTRL_TFCE);
+
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ return 0;
+}
+
+/******************************************************************************
+ * Configures flow control settings after link is established
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Should be called immediately after a valid link has been established.
+ * Forces MAC flow control settings if link was forced. When in MII/GMII mode
+ * and autonegotiation is enabled, the MAC flow control settings will be set
+ * based on the flow control negotiated by the PHY. In TBI mode, the TFCE
+ * and RFCE bits will be automaticaly set to the negotiated flow control mode.
+ *****************************************************************************/
+int32_t
+e1000_config_fc_after_link_up(struct e1000_hw *hw)
+{
+ int32_t ret_val;
+ uint16_t mii_status_reg;
+ uint16_t mii_nway_adv_reg;
+ uint16_t mii_nway_lp_ability_reg;
+ uint16_t speed;
+ uint16_t duplex;
+
+ DEBUGFUNC("e1000_config_fc_after_link_up");
+
+ /* Check for the case where we have fiber media and auto-neg failed
+ * so we had to force link. In this case, we need to force the
+ * configuration of the MAC to match the "fc" parameter.
+ */
+ if(((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed)) ||
+ ((hw->media_type == e1000_media_type_copper) && (!hw->autoneg))) {
+ ret_val = e1000_force_mac_fc(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error forcing flow control settings\n");
+ return ret_val;
+ }
+ }
+
+ /* Check for the case where we have copper media and auto-neg is
+ * enabled. In this case, we need to check and see if Auto-Neg
+ * has completed, and if so, how the PHY and link partner has
+ * flow control configured.
+ */
+ if((hw->media_type == e1000_media_type_copper) && hw->autoneg) {
+ /* Read the MII Status Register and check to see if AutoNeg
+ * has completed. We read this twice because this reg has
+ * some "sticky" (latched) bits.
+ */
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+ DEBUGOUT("PHY Read Error \n");
+ return -E1000_ERR_PHY;
+ }
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+ DEBUGOUT("PHY Read Error \n");
+ return -E1000_ERR_PHY;
+ }
+
+ if(mii_status_reg & MII_SR_AUTONEG_COMPLETE) {
+ /* The AutoNeg process has completed, so we now need to
+ * read both the Auto Negotiation Advertisement Register
+ * (Address 4) and the Auto_Negotiation Base Page Ability
+ * Register (Address 5) to determine how flow control was
+ * negotiated.
+ */
+ if(e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(e1000_read_phy_reg(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ /* Two bits in the Auto Negotiation Advertisement Register
+ * (Address 4) and two bits in the Auto Negotiation Base
+ * Page Ability Register (Address 5) determine flow control
+ * for both the PHY and the link partner. The following
+ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+ * 1999, describes these PAUSE resolution bits and how flow
+ * control is determined based upon these settings.
+ * NOTE: DC = Don't Care
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+ *-------|---------|-------|---------|--------------------
+ * 0 | 0 | DC | DC | e1000_fc_none
+ * 0 | 1 | 0 | DC | e1000_fc_none
+ * 0 | 1 | 1 | 0 | e1000_fc_none
+ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause
+ * 1 | 0 | 0 | DC | e1000_fc_none
+ * 1 | DC | 1 | DC | e1000_fc_full
+ * 1 | 1 | 0 | 0 | e1000_fc_none
+ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause
+ *
+ */
+ /* Are both PAUSE bits set to 1? If so, this implies
+ * Symmetric Flow Control is enabled at both ends. The
+ * ASM_DIR bits are irrelevant per the spec.
+ *
+ * For Symmetric Flow Control:
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | DC | 1 | DC | e1000_fc_full
+ *
+ */
+ if((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+ /* Now we need to check if the user selected RX ONLY
+ * of pause frames. In this case, we had to advertise
+ * FULL flow control because we could not advertise RX
+ * ONLY. Hence, we must now check to see if we need to
+ * turn OFF the TRANSMISSION of PAUSE frames.
+ */
+ if(hw->original_fc == e1000_fc_full) {
+ hw->fc = e1000_fc_full;
+ DEBUGOUT("Flow Control = FULL.\r\n");
+ } else {
+ hw->fc = e1000_fc_rx_pause;
+ DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n");
+ }
+ }
+ /* For receiving PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause
+ *
+ */
+ else if(!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+ hw->fc = e1000_fc_tx_pause;
+ DEBUGOUT("Flow Control = TX PAUSE frames only.\r\n");
+ }
+ /* For transmitting PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause
+ *
+ */
+ else if((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+ !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+ hw->fc = e1000_fc_rx_pause;
+ DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n");
+ }
+ /* Per the IEEE spec, at this point flow control should be
+ * disabled. However, we want to consider that we could
+ * be connected to a legacy switch that doesn't advertise
+ * desired flow control, but can be forced on the link
+ * partner. So if we advertised no flow control, that is
+ * what we will resolve to. If we advertised some kind of
+ * receive capability (Rx Pause Only or Full Flow Control)
+ * and the link partner advertised none, we will configure
+ * ourselves to enable Rx Flow Control only. We can do
+ * this safely for two reasons: If the link partner really
+ * didn't want flow control enabled, and we enable Rx, no
+ * harm done since we won't be receiving any PAUSE frames
+ * anyway. If the intent on the link partner was to have
+ * flow control enabled, then by us enabling RX only, we
+ * can at least receive pause frames and process them.
+ * This is a good idea because in most cases, since we are
+ * predominantly a server NIC, more times than not we will
+ * be asked to delay transmission of packets than asking
+ * our link partner to pause transmission of frames.
+ */
+ else if(hw->original_fc == e1000_fc_none ||
+ hw->original_fc == e1000_fc_tx_pause) {
+ hw->fc = e1000_fc_none;
+ DEBUGOUT("Flow Control = NONE.\r\n");
+ } else {
+ hw->fc = e1000_fc_rx_pause;
+ DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n");
+ }
+
+ /* Now we need to do one last check... If we auto-
+ * negotiated to HALF DUPLEX, flow control should not be
+ * enabled per IEEE 802.3 spec.
+ */
+ e1000_get_speed_and_duplex(hw, &speed, &duplex);
+
+ if(duplex == HALF_DUPLEX)
+ hw->fc = e1000_fc_none;
+
+ /* Now we call a subroutine to actually force the MAC
+ * controller to use the correct flow control settings.
+ */
+ ret_val = e1000_force_mac_fc(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error forcing flow control settings\n");
+ return ret_val;
+ }
+ } else {
+ DEBUGOUT("Copper PHY and Auto Neg has not completed.\r\n");
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Checks to see if the link status of the hardware has changed.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Called by any function that needs to check the link status of the adapter.
+ *****************************************************************************/
+int32_t
+e1000_check_for_link(struct e1000_hw *hw)
+{
+ uint32_t rxcw;
+ uint32_t ctrl;
+ uint32_t status;
+ uint32_t rctl;
+ uint32_t signal;
+ int32_t ret_val;
+ uint16_t phy_data;
+ uint16_t lp_capability;
+
+ DEBUGFUNC("e1000_check_for_link");
+
+ /* On adapters with a MAC newer that 82544, SW Defineable pin 1 will be
+ * set when the optics detect a signal. On older adapters, it will be
+ * cleared when there is a signal
+ */
+ if(hw->mac_type > e1000_82544) signal = E1000_CTRL_SWDPIN1;
+ else signal = 0;
+
+ ctrl = E1000_READ_REG(hw, CTRL);
+ status = E1000_READ_REG(hw, STATUS);
+ rxcw = E1000_READ_REG(hw, RXCW);
+
+ /* If we have a copper PHY then we only want to go out to the PHY
+ * registers to see if Auto-Neg has completed and/or if our link
+ * status has changed. The get_link_status flag will be set if we
+ * receive a Link Status Change interrupt or we have Rx Sequence
+ * Errors.
+ */
+ if((hw->media_type == e1000_media_type_copper) && hw->get_link_status) {
+ /* First we want to see if the MII Status Register reports
+ * link. If so, then we want to get the current speed/duplex
+ * of the PHY.
+ * Read the register twice since the link bit is sticky.
+ */
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+
+ if(phy_data & MII_SR_LINK_STATUS) {
+ hw->get_link_status = FALSE;
+ } else {
+ /* No link detected */
+ return 0;
+ }
+
+ /* If we are forcing speed/duplex, then we simply return since
+ * we have already determined whether we have link or not.
+ */
+ if(!hw->autoneg) return -E1000_ERR_CONFIG;
+
+ /* We have a M88E1000 PHY and Auto-Neg is enabled. If we
+ * have Si on board that is 82544 or newer, Auto
+ * Speed Detection takes care of MAC speed/duplex
+ * configuration. So we only need to configure Collision
+ * Distance in the MAC. Otherwise, we need to force
+ * speed/duplex on the MAC to the current PHY speed/duplex
+ * settings.
+ */
+ if(hw->mac_type >= e1000_82544)
+ e1000_config_collision_dist(hw);
+ else {
+ ret_val = e1000_config_mac_to_phy(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error configuring MAC to PHY settings\n");
+ return ret_val;
+ }
+ }
+
+ /* Configure Flow Control now that Auto-Neg has completed. First, we
+ * need to restore the desired flow control settings because we may
+ * have had to re-autoneg with a different link partner.
+ */
+ ret_val = e1000_config_fc_after_link_up(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error configuring flow control\n");
+ return ret_val;
+ }
+
+ /* At this point we know that we are on copper and we have
+ * auto-negotiated link. These are conditions for checking the link
+ * parter capability register. We use the link partner capability to
+ * determine if TBI Compatibility needs to be turned on or off. If
+ * the link partner advertises any speed in addition to Gigabit, then
+ * we assume that they are GMII-based, and TBI compatibility is not
+ * needed. If no other speeds are advertised, we assume the link
+ * partner is TBI-based, and we turn on TBI Compatibility.
+ */
+ if(hw->tbi_compatibility_en) {
+ if(e1000_read_phy_reg(hw, PHY_LP_ABILITY, &lp_capability) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(lp_capability & (NWAY_LPAR_10T_HD_CAPS |
+ NWAY_LPAR_10T_FD_CAPS |
+ NWAY_LPAR_100TX_HD_CAPS |
+ NWAY_LPAR_100TX_FD_CAPS |
+ NWAY_LPAR_100T4_CAPS)) {
+ /* If our link partner advertises anything in addition to
+ * gigabit, we do not need to enable TBI compatibility.
+ */
+ if(hw->tbi_compatibility_on) {
+ /* If we previously were in the mode, turn it off. */
+ rctl = E1000_READ_REG(hw, RCTL);
+ rctl &= ~E1000_RCTL_SBP;
+ E1000_WRITE_REG(hw, RCTL, rctl);
+ hw->tbi_compatibility_on = FALSE;
+ }
+ } else {
+ /* If TBI compatibility is was previously off, turn it on. For
+ * compatibility with a TBI link partner, we will store bad
+ * packets. Some frames have an additional byte on the end and
+ * will look like CRC errors to to the hardware.
+ */
+ if(!hw->tbi_compatibility_on) {
+ hw->tbi_compatibility_on = TRUE;
+ rctl = E1000_READ_REG(hw, RCTL);
+ rctl |= E1000_RCTL_SBP;
+ E1000_WRITE_REG(hw, RCTL, rctl);
+ }
+ }
+ }
+ }
+ /* If we don't have link (auto-negotiation failed or link partner cannot
+ * auto-negotiate), the cable is plugged in (we have signal), and our
+ * link partner is not trying to auto-negotiate with us (we are receiving
+ * idles or data), we need to force link up. We also need to give
+ * auto-negotiation time to complete, in case the cable was just plugged
+ * in. The autoneg_failed flag does this.
+ */
+ else if((hw->media_type == e1000_media_type_fiber) &&
+ (!(status & E1000_STATUS_LU)) &&
+ ((ctrl & E1000_CTRL_SWDPIN1) == signal) &&
+ (!(rxcw & E1000_RXCW_C))) {
+ if(hw->autoneg_failed == 0) {
+ hw->autoneg_failed = 1;
+ return 0;
+ }
+ DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\r\n");
+
+ /* Disable auto-negotiation in the TXCW register */
+ E1000_WRITE_REG(hw, TXCW, (hw->txcw & ~E1000_TXCW_ANE));
+
+ /* Force link-up and also force full-duplex. */
+ ctrl = E1000_READ_REG(hw, CTRL);
+ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+
+ /* Configure Flow Control after forcing link up. */
+ ret_val = e1000_config_fc_after_link_up(hw);
+ if(ret_val < 0) {
+ DEBUGOUT("Error configuring flow control\n");
+ return ret_val;
+ }
+ }
+ /* If we are forcing link and we are receiving /C/ ordered sets, re-enable
+ * auto-negotiation in the TXCW register and disable forced link in the
+ * Device Control register in an attempt to auto-negotiate with our link
+ * partner.
+ */
+ else if((hw->media_type == e1000_media_type_fiber) &&
+ (ctrl & E1000_CTRL_SLU) &&
+ (rxcw & E1000_RXCW_C)) {
+ DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\r\n");
+ E1000_WRITE_REG(hw, TXCW, hw->txcw);
+ E1000_WRITE_REG(hw, CTRL, (ctrl & ~E1000_CTRL_SLU));
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Detects the current speed and duplex settings of the hardware.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * speed - Speed of the connection
+ * duplex - Duplex setting of the connection
+ *****************************************************************************/
+void
+e1000_get_speed_and_duplex(struct e1000_hw *hw,
+ uint16_t *speed,
+ uint16_t *duplex)
+{
+ uint32_t status;
+
+ DEBUGFUNC("e1000_get_speed_and_duplex");
+
+ if(hw->mac_type >= e1000_82543) {
+ status = E1000_READ_REG(hw, STATUS);
+ if(status & E1000_STATUS_SPEED_1000) {
+ *speed = SPEED_1000;
+ DEBUGOUT("1000 Mbs, ");
+ } else if(status & E1000_STATUS_SPEED_100) {
+ *speed = SPEED_100;
+ DEBUGOUT("100 Mbs, ");
+ } else {
+ *speed = SPEED_10;
+ DEBUGOUT("10 Mbs, ");
+ }
+
+ if(status & E1000_STATUS_FD) {
+ *duplex = FULL_DUPLEX;
+ DEBUGOUT("Full Duplex\r\n");
+ } else {
+ *duplex = HALF_DUPLEX;
+ DEBUGOUT(" Half Duplex\r\n");
+ }
+ } else {
+ DEBUGOUT("1000 Mbs, Full Duplex\r\n");
+ *speed = SPEED_1000;
+ *duplex = FULL_DUPLEX;
+ }
+}
+
+/******************************************************************************
+* Blocks until autoneg completes or times out (~4.5 seconds)
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+int32_t
+e1000_wait_autoneg(struct e1000_hw *hw)
+{
+ uint16_t i;
+ uint16_t phy_data;
+
+ DEBUGFUNC("e1000_wait_autoneg");
+ DEBUGOUT("Waiting for Auto-Neg to complete.\n");
+
+ /* We will wait for autoneg to complete or 4.5 seconds to expire. */
+ for(i = PHY_AUTO_NEG_TIME; i > 0; i--) {
+ /* Read the MII Status Register and wait for Auto-Neg
+ * Complete bit to be set.
+ */
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ if(phy_data & MII_SR_AUTONEG_COMPLETE) {
+ return 0;
+ }
+ msec_delay(100);
+ }
+ return 0;
+}
+
+/******************************************************************************
+* Raises the Management Data Clock
+*
+* hw - Struct containing variables accessed by shared code
+* ctrl - Device control register's current value
+******************************************************************************/
+static void
+e1000_raise_mdi_clk(struct e1000_hw *hw,
+ uint32_t *ctrl)
+{
+ /* Raise the clock input to the Management Data Clock (by setting the MDC
+ * bit), and then delay 2 microseconds.
+ */
+ E1000_WRITE_REG(hw, CTRL, (*ctrl | E1000_CTRL_MDC));
+ E1000_WRITE_FLUSH(hw);
+ udelay(2);
+}
+
+/******************************************************************************
+* Lowers the Management Data Clock
+*
+* hw - Struct containing variables accessed by shared code
+* ctrl - Device control register's current value
+******************************************************************************/
+static void
+e1000_lower_mdi_clk(struct e1000_hw *hw,
+ uint32_t *ctrl)
+{
+ /* Lower the clock input to the Management Data Clock (by clearing the MDC
+ * bit), and then delay 2 microseconds.
+ */
+ E1000_WRITE_REG(hw, CTRL, (*ctrl & ~E1000_CTRL_MDC));
+ E1000_WRITE_FLUSH(hw);
+ udelay(2);
+}
+
+/******************************************************************************
+* Shifts data bits out to the PHY
+*
+* hw - Struct containing variables accessed by shared code
+* data - Data to send out to the PHY
+* count - Number of bits to shift out
+*
+* Bits are shifted out in MSB to LSB order.
+******************************************************************************/
+static void
+e1000_shift_out_mdi_bits(struct e1000_hw *hw,
+ uint32_t data,
+ uint16_t count)
+{
+ uint32_t ctrl;
+ uint32_t mask;
+
+ /* We need to shift "count" number of bits out to the PHY. So, the value
+ * in the "data" parameter will be shifted out to the PHY one bit at a
+ * time. In order to do this, "data" must be broken down into bits.
+ */
+ mask = 0x01;
+ mask <<= (count - 1);
+
+ ctrl = E1000_READ_REG(hw, CTRL);
+
+ /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */
+ ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR);
+
+ while(mask) {
+ /* A "1" is shifted out to the PHY by setting the MDIO bit to "1" and
+ * then raising and lowering the Management Data Clock. A "0" is
+ * shifted out to the PHY by setting the MDIO bit to "0" and then
+ * raising and lowering the clock.
+ */
+ if(data & mask) ctrl |= E1000_CTRL_MDIO;
+ else ctrl &= ~E1000_CTRL_MDIO;
+
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ E1000_WRITE_FLUSH(hw);
+
+ udelay(2);
+
+ e1000_raise_mdi_clk(hw, &ctrl);
+ e1000_lower_mdi_clk(hw, &ctrl);
+
+ mask = mask >> 1;
+ }
+}
+
+/******************************************************************************
+* Shifts data bits in from the PHY
+*
+* hw - Struct containing variables accessed by shared code
+*
+* Bits are shifted in in MSB to LSB order.
+******************************************************************************/
+static uint16_t
+e1000_shift_in_mdi_bits(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+ uint16_t data = 0;
+ uint8_t i;
+
+ /* In order to read a register from the PHY, we need to shift in a total
+ * of 18 bits from the PHY. The first two bit (turnaround) times are used
+ * to avoid contention on the MDIO pin when a read operation is performed.
+ * These two bits are ignored by us and thrown away. Bits are "shifted in"
+ * by raising the input to the Management Data Clock (setting the MDC bit),
+ * and then reading the value of the MDIO bit.
+ */
+ ctrl = E1000_READ_REG(hw, CTRL);
+
+ /* Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as input. */
+ ctrl &= ~E1000_CTRL_MDIO_DIR;
+ ctrl &= ~E1000_CTRL_MDIO;
+
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ E1000_WRITE_FLUSH(hw);
+
+ /* Raise and Lower the clock before reading in the data. This accounts for
+ * the turnaround bits. The first clock occurred when we clocked out the
+ * last bit of the Register Address.
+ */
+ e1000_raise_mdi_clk(hw, &ctrl);
+ e1000_lower_mdi_clk(hw, &ctrl);
+
+ for(data = 0, i = 0; i < 16; i++) {
+ data = data << 1;
+ e1000_raise_mdi_clk(hw, &ctrl);
+ ctrl = E1000_READ_REG(hw, CTRL);
+ /* Check to see if we shifted in a "1". */
+ if(ctrl & E1000_CTRL_MDIO) data |= 1;
+ e1000_lower_mdi_clk(hw, &ctrl);
+ }
+
+ e1000_raise_mdi_clk(hw, &ctrl);
+ e1000_lower_mdi_clk(hw, &ctrl);
+
+ return data;
+}
+
+/*****************************************************************************
+* Reads the value from a PHY register
+*
+* hw - Struct containing variables accessed by shared code
+* reg_addr - address of the PHY register to read
+******************************************************************************/
+int32_t
+e1000_read_phy_reg(struct e1000_hw *hw,
+ uint32_t reg_addr,
+ uint16_t *phy_data)
+{
+ uint32_t i;
+ uint32_t mdic = 0;
+ const uint32_t phy_addr = 1;
+
+ DEBUGFUNC("XXXXe1000_read_phy_reg");
+
+ if(reg_addr > MAX_PHY_REG_ADDRESS) {
+ DEBUGOUT1("PHY Address %d is out of range\n", reg_addr);
+ return -E1000_ERR_PARAM;
+ }
+
+ if(hw->mac_type > e1000_82543) {
+ /* Set up Op-code, Phy Address, and register address in the MDI
+ * Control register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+ mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
+ (phy_addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_READ));
+
+ E1000_WRITE_REG(hw, MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed */
+ for(i = 0; i < 64; i++) {
+ udelay(10);
+ mdic = E1000_READ_REG(hw, MDIC);
+ if(mdic & E1000_MDIC_READY) break;
+ }
+ if(!(mdic & E1000_MDIC_READY)) {
+ DEBUGOUT("MDI Read did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if(mdic & E1000_MDIC_ERROR) {
+ DEBUGOUT("MDI Error\n");
+ return -E1000_ERR_PHY;
+ }
+ *phy_data = (uint16_t) mdic;
+ } else {
+ /* We must first send a preamble through the MDIO pin to signal the
+ * beginning of an MII instruction. This is done by sending 32
+ * consecutive "1" bits.
+ */
+ e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
+
+ /* Now combine the next few fields that are required for a read
+ * operation. We use this method instead of calling the
+ * e1000_shift_out_mdi_bits routine five different times. The format of
+ * a MII read instruction consists of a shift out of 14 bits and is
+ * defined as follows:
+ * <Preamble><SOF><Op Code><Phy Addr><Reg Addr>
+ * followed by a shift in of 18 bits. This first two bits shifted in
+ * are TurnAround bits used to avoid contention on the MDIO pin when a
+ * READ operation is performed. These two bits are thrown away
+ * followed by a shift in of 16 bits which contains the desired data.
+ */
+ mdic = ((reg_addr) | (phy_addr << 5) |
+ (PHY_OP_READ << 10) | (PHY_SOF << 12));
+
+ e1000_shift_out_mdi_bits(hw, mdic, 14);
+
+ /* Now that we've shifted out the read command to the MII, we need to
+ * "shift in" the 16-bit value (18 total bits) of the requested PHY
+ * register address.
+ */
+ *phy_data = e1000_shift_in_mdi_bits(hw);
+ }
+ return 0;
+}
+
+/******************************************************************************
+* Writes a value to a PHY register
+*
+* hw - Struct containing variables accessed by shared code
+* reg_addr - address of the PHY register to write
+* data - data to write to the PHY
+******************************************************************************/
+int32_t
+e1000_write_phy_reg(struct e1000_hw *hw,
+ uint32_t reg_addr,
+ uint16_t phy_data)
+{
+ uint32_t i;
+ uint32_t mdic = 0;
+ const uint32_t phy_addr = 1;
+
+ DEBUGFUNC("e1000_write_phy_reg");
+
+ if(reg_addr > MAX_PHY_REG_ADDRESS) {
+ DEBUGOUT1("PHY Address %d is out of range\n", reg_addr);
+ return -E1000_ERR_PARAM;
+ }
+
+ if(hw->mac_type > e1000_82543) {
+ /* Set up Op-code, Phy Address, register address, and data intended
+ * for the PHY register in the MDI Control register. The MAC will take
+ * care of interfacing with the PHY to send the desired data.
+ */
+ mdic = (((uint32_t) phy_data) |
+ (reg_addr << E1000_MDIC_REG_SHIFT) |
+ (phy_addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_WRITE));
+
+ E1000_WRITE_REG(hw, MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed */
+ for(i = 0; i < 64; i++) {
+ udelay(10);
+ mdic = E1000_READ_REG(hw, MDIC);
+ if(mdic & E1000_MDIC_READY) break;
+ }
+ if(!(mdic & E1000_MDIC_READY)) {
+ DEBUGOUT("MDI Write did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ } else {
+ /* We'll need to use the SW defined pins to shift the write command
+ * out to the PHY. We first send a preamble to the PHY to signal the
+ * beginning of the MII instruction. This is done by sending 32
+ * consecutive "1" bits.
+ */
+ e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
+
+ /* Now combine the remaining required fields that will indicate a
+ * write operation. We use this method instead of calling the
+ * e1000_shift_out_mdi_bits routine for each field in the command. The
+ * format of a MII write instruction is as follows:
+ * <Preamble><SOF><Op Code><Phy Addr><Reg Addr><Turnaround><Data>.
+ */
+ mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) |
+ (PHY_OP_WRITE << 12) | (PHY_SOF << 14));
+ mdic <<= 16;
+ mdic |= (uint32_t) phy_data;
+
+ e1000_shift_out_mdi_bits(hw, mdic, 32);
+ }
+ return 0;
+}
+
+/******************************************************************************
+* Returns the PHY to the power-on reset state
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+void
+e1000_phy_hw_reset(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+ uint32_t ctrl_ext;
+
+ DEBUGFUNC("e1000_phy_hw_reset");
+
+ DEBUGOUT("Resetting Phy...\n");
+
+ if(hw->mac_type > e1000_82543) {
+ /* Read the device control register and assert the E1000_CTRL_PHY_RST
+ * bit. Then, take it out of reset.
+ */
+ ctrl = E1000_READ_REG(hw, CTRL);
+ E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PHY_RST);
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(10);
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ E1000_WRITE_FLUSH(hw);
+ } else {
+ /* Read the Extended Device Control Register, assert the PHY_RESET_DIR
+ * bit to put the PHY into reset. Then, take it out of reset.
+ */
+ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR;
+ ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA;
+ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+ E1000_WRITE_FLUSH(hw);
+ msec_delay(10);
+ ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA;
+ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+ E1000_WRITE_FLUSH(hw);
+ }
+ udelay(150);
+}
+
+/******************************************************************************
+* Resets the PHY
+*
+* hw - Struct containing variables accessed by shared code
+*
+* Sets bit 15 of the MII Control regiser
+******************************************************************************/
+int32_t
+e1000_phy_reset(struct e1000_hw *hw)
+{
+ uint16_t phy_data;
+
+ DEBUGFUNC("e1000_phy_reset");
+
+ if(e1000_read_phy_reg(hw, PHY_CTRL, &phy_data) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ phy_data |= MII_CR_RESET;
+ if(e1000_write_phy_reg(hw, PHY_CTRL, phy_data) < 0) {
+ DEBUGOUT("PHY Write Error\n");
+ return -E1000_ERR_PHY;
+ }
+ udelay(1);
+ return 0;
+}
+
+/******************************************************************************
+* Probes the expected PHY address for known PHY IDs
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+int32_t
+e1000_detect_gig_phy(struct e1000_hw *hw)
+{
+ uint16_t phy_id_high, phy_id_low;
+ boolean_t match = FALSE;
+
+ DEBUGFUNC("e1000_detect_gig_phy");
+
+ /* Read the PHY ID Registers to identify which PHY is onboard. */
+ if(e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ hw->phy_id = (uint32_t) (phy_id_high << 16);
+ udelay(2);
+ if(e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low) < 0) {
+ DEBUGOUT("PHY Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ hw->phy_id |= (uint32_t) (phy_id_low & PHY_REVISION_MASK);
+ hw->phy_revision = (uint32_t) phy_id_low & ~PHY_REVISION_MASK;
+
+ switch(hw->mac_type) {
+ case e1000_82543:
+ if(hw->phy_id == M88E1000_E_PHY_ID) match = TRUE;
+ break;
+ case e1000_82544:
+ if(hw->phy_id == M88E1000_I_PHY_ID) match = TRUE;
+ break;
+ case e1000_82540:
+ case e1000_82545:
+ case e1000_82546:
+ if(hw->phy_id == M88E1011_I_PHY_ID) match = TRUE;
+ break;
+ default:
+ DEBUGOUT1("Invalid MAC type %d\n", hw->mac_type);
+ return -E1000_ERR_CONFIG;
+ }
+ if(match) {
+ DEBUGOUT1("PHY ID 0x%X detected\n", hw->phy_id);
+ return 0;
+ }
+ DEBUGOUT1("Invalid PHY ID 0x%X\n", hw->phy_id);
+ return -E1000_ERR_PHY;
+}
+
+/******************************************************************************
+* Resets the PHY's DSP
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+static int32_t
+e1000_phy_reset_dsp(struct e1000_hw *hw)
+{
+ int32_t ret_val = -E1000_ERR_PHY;
+ DEBUGFUNC("e1000_phy_reset_dsp");
+
+ do {
+ if(e1000_write_phy_reg(hw, 29, 0x001d) < 0) break;
+ if(e1000_write_phy_reg(hw, 30, 0x00c1) < 0) break;
+ if(e1000_write_phy_reg(hw, 30, 0x0000) < 0) break;
+ ret_val = 0;
+ } while(0);
+
+ if(ret_val < 0) DEBUGOUT("PHY Write Error\n");
+ return ret_val;
+}
+
+/******************************************************************************
+* Get PHY information from various PHY registers
+*
+* hw - Struct containing variables accessed by shared code
+* phy_info - PHY information structure
+******************************************************************************/
+int32_t
+e1000_phy_get_info(struct e1000_hw *hw,
+ struct e1000_phy_info *phy_info)
+{
+ int32_t ret_val = -E1000_ERR_PHY;
+ uint16_t phy_data;
+
+ DEBUGFUNC("e1000_phy_get_info");
+
+ phy_info->cable_length = e1000_cable_length_undefined;
+ phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_undefined;
+ phy_info->cable_polarity = e1000_rev_polarity_undefined;
+ phy_info->polarity_correction = e1000_polarity_reversal_undefined;
+ phy_info->mdix_mode = e1000_auto_x_mode_undefined;
+ phy_info->local_rx = e1000_1000t_rx_status_undefined;
+ phy_info->remote_rx = e1000_1000t_rx_status_undefined;
+
+ if(hw->media_type != e1000_media_type_copper) {
+ DEBUGOUT("PHY info is only valid for copper media\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ do {
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) break;
+ if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) break;
+ if((phy_data & MII_SR_LINK_STATUS) != MII_SR_LINK_STATUS) {
+ DEBUGOUT("PHY info is only valid if link is up\n");
+ return -E1000_ERR_CONFIG;
+ }
+
+ if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0)
+ break;
+ phy_info->extended_10bt_distance =
+ (phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >>
+ M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT;
+ phy_info->polarity_correction =
+ (phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >>
+ M88E1000_PSCR_POLARITY_REVERSAL_SHIFT;
+
+ if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data) < 0)
+ break;
+ phy_info->cable_polarity = (phy_data & M88E1000_PSSR_REV_POLARITY) >>
+ M88E1000_PSSR_REV_POLARITY_SHIFT;
+ phy_info->mdix_mode = (phy_data & M88E1000_PSSR_MDIX) >>
+ M88E1000_PSSR_MDIX_SHIFT;
+ if(phy_data & M88E1000_PSSR_1000MBS) {
+ /* Cable Length Estimation and Local/Remote Receiver Informatoion
+ * are only valid at 1000 Mbps
+ */
+ phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+ M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+ if(e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data) < 0)
+ break;
+ phy_info->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+ SR_1000T_LOCAL_RX_STATUS_SHIFT;
+ phy_info->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+ SR_1000T_REMOTE_RX_STATUS_SHIFT;
+ }
+ ret_val = 0;
+ } while(0);
+
+ if(ret_val < 0) DEBUGOUT("PHY Read Error\n");
+ return ret_val;
+}
+
+int32_t
+e1000_validate_mdi_setting(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_validate_mdi_settings");
+
+ if(!hw->autoneg && (hw->mdix == 0 || hw->mdix == 3)) {
+ DEBUGOUT("Invalid MDI setting detected\n");
+ hw->mdix = 1;
+ return -E1000_ERR_CONFIG;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Raises the EEPROM's clock input.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * eecd - EECD's current value
+ *****************************************************************************/
+static void
+e1000_raise_ee_clk(struct e1000_hw *hw,
+ uint32_t *eecd)
+{
+ /* Raise the clock input to the EEPROM (by setting the SK bit), and then
+ * wait <delay> microseconds.
+ */
+ *eecd = *eecd | E1000_EECD_SK;
+ E1000_WRITE_REG(hw, EECD, *eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+}
+
+/******************************************************************************
+ * Lowers the EEPROM's clock input.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * eecd - EECD's current value
+ *****************************************************************************/
+static void
+e1000_lower_ee_clk(struct e1000_hw *hw,
+ uint32_t *eecd)
+{
+ /* Lower the clock input to the EEPROM (by clearing the SK bit), and then
+ * wait 50 microseconds.
+ */
+ *eecd = *eecd & ~E1000_EECD_SK;
+ E1000_WRITE_REG(hw, EECD, *eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+}
+
+/******************************************************************************
+ * Shift data bits out to the EEPROM.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * data - data to send to the EEPROM
+ * count - number of bits to shift out
+ *****************************************************************************/
+static void
+e1000_shift_out_ee_bits(struct e1000_hw *hw,
+ uint16_t data,
+ uint16_t count)
+{
+ uint32_t eecd;
+ uint32_t mask;
+
+ /* We need to shift "count" bits out to the EEPROM. So, value in the
+ * "data" parameter will be shifted out to the EEPROM one bit at a time.
+ * In order to do this, "data" must be broken down into bits.
+ */
+ mask = 0x01 << (count - 1);
+ eecd = E1000_READ_REG(hw, EECD);
+ eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+ do {
+ /* A "1" is shifted out to the EEPROM by setting bit "DI" to a "1",
+ * and then raising and then lowering the clock (the SK bit controls
+ * the clock input to the EEPROM). A "0" is shifted out to the EEPROM
+ * by setting "DI" to "0" and then raising and then lowering the clock.
+ */
+ eecd &= ~E1000_EECD_DI;
+
+ if(data & mask)
+ eecd |= E1000_EECD_DI;
+
+ E1000_WRITE_REG(hw, EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+
+ udelay(50);
+
+ e1000_raise_ee_clk(hw, &eecd);
+ e1000_lower_ee_clk(hw, &eecd);
+
+ mask = mask >> 1;
+
+ } while(mask);
+
+ /* We leave the "DI" bit set to "0" when we leave this routine. */
+ eecd &= ~E1000_EECD_DI;
+ E1000_WRITE_REG(hw, EECD, eecd);
+}
+
+/******************************************************************************
+ * Shift data bits in from the EEPROM
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static uint16_t
+e1000_shift_in_ee_bits(struct e1000_hw *hw)
+{
+ uint32_t eecd;
+ uint32_t i;
+ uint16_t data;
+
+ /* In order to read a register from the EEPROM, we need to shift 'count'
+ * bits in from the EEPROM. Bits are "shifted in" by raising the clock
+ * input to the EEPROM (setting the SK bit), and then reading the value of
+ * the "DO" bit. During this "shifting in" process the "DI" bit should
+ * always be clear.
+ */
+
+ eecd = E1000_READ_REG(hw, EECD);
+
+ eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+ data = 0;
+
+ for(i = 0; i < 16; i++) {
+ data = data << 1;
+ e1000_raise_ee_clk(hw, &eecd);
+
+ eecd = E1000_READ_REG(hw, EECD);
+
+ eecd &= ~(E1000_EECD_DI);
+ if(eecd & E1000_EECD_DO)
+ data |= 1;
+
+ e1000_lower_ee_clk(hw, &eecd);
+ }
+
+ return data;
+}
+
+/******************************************************************************
+ * Prepares EEPROM for access
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This
+ * function should be called before issuing a command to the EEPROM.
+ *****************************************************************************/
+static void
+e1000_setup_eeprom(struct e1000_hw *hw)
+{
+ uint32_t eecd;
+
+ eecd = E1000_READ_REG(hw, EECD);
+
+ /* Clear SK and DI */
+ eecd &= ~(E1000_EECD_SK | E1000_EECD_DI);
+ E1000_WRITE_REG(hw, EECD, eecd);
+
+ /* Set CS */
+ eecd |= E1000_EECD_CS;
+ E1000_WRITE_REG(hw, EECD, eecd);
+}
+
+/******************************************************************************
+ * Returns EEPROM to a "standby" state
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static void
+e1000_standby_eeprom(struct e1000_hw *hw)
+{
+ uint32_t eecd;
+
+ eecd = E1000_READ_REG(hw, EECD);
+
+ /* Deselct EEPROM */
+ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+ E1000_WRITE_REG(hw, EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+
+ /* Clock high */
+ eecd |= E1000_EECD_SK;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+
+ /* Select EEPROM */
+ eecd |= E1000_EECD_CS;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+
+ /* Clock low */
+ eecd &= ~E1000_EECD_SK;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+}
+
+/******************************************************************************
+ * Raises then lowers the EEPROM's clock pin
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static void
+e1000_clock_eeprom(struct e1000_hw *hw)
+{
+ uint32_t eecd;
+
+ eecd = E1000_READ_REG(hw, EECD);
+
+ /* Rising edge of clock */
+ eecd |= E1000_EECD_SK;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+
+ /* Falling edge of clock */
+ eecd &= ~E1000_EECD_SK;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ E1000_WRITE_FLUSH(hw);
+ udelay(50);
+}
+
+/******************************************************************************
+ * Terminates a command by lowering the EEPROM's chip select pin
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static void
+e1000_cleanup_eeprom(struct e1000_hw *hw)
+{
+ uint32_t eecd;
+
+ eecd = E1000_READ_REG(hw, EECD);
+
+ eecd &= ~(E1000_EECD_CS | E1000_EECD_DI);
+
+ E1000_WRITE_REG(hw, EECD, eecd);
+
+ e1000_clock_eeprom(hw);
+}
+
+/******************************************************************************
+ * Reads a 16 bit word from the EEPROM.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset of word in the EEPROM to read
+ * data - word read from the EEPROM
+ *****************************************************************************/
+int32_t
+e1000_read_eeprom(struct e1000_hw *hw,
+ uint16_t offset,
+ uint16_t *data)
+{
+ uint32_t eecd;
+ uint32_t i = 0;
+ boolean_t large_eeprom = FALSE;
+
+ DEBUGFUNC("e1000_read_eeprom");
+
+ /* Request EEPROM Access */
+ if(hw->mac_type > e1000_82544) {
+ eecd = E1000_READ_REG(hw, EECD);
+ if(eecd & E1000_EECD_SIZE) large_eeprom = TRUE;
+ eecd |= E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ eecd = E1000_READ_REG(hw, EECD);
+ while((!(eecd & E1000_EECD_GNT)) && (i < 100)) {
+ i++;
+ udelay(5);
+ eecd = E1000_READ_REG(hw, EECD);
+ }
+ if(!(eecd & E1000_EECD_GNT)) {
+ eecd &= ~E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ DEBUGOUT("Could not acquire EEPROM grant\n");
+ return -E1000_ERR_EEPROM;
+ }
+ }
+
+ /* Prepare the EEPROM for reading */
+ e1000_setup_eeprom(hw);
+
+ /* Send the READ command (opcode + addr) */
+ e1000_shift_out_ee_bits(hw, EEPROM_READ_OPCODE, 3);
+ if(large_eeprom) {
+ /* If we have a 256 word EEPROM, there are 8 address bits */
+ e1000_shift_out_ee_bits(hw, offset, 8);
+ } else {
+ /* If we have a 64 word EEPROM, there are 6 address bits */
+ e1000_shift_out_ee_bits(hw, offset, 6);
+ }
+
+ /* Read the data */
+ *data = e1000_shift_in_ee_bits(hw);
+
+ /* End this read operation */
+ e1000_standby_eeprom(hw);
+
+ /* Stop requesting EEPROM access */
+ if(hw->mac_type > e1000_82544) {
+ eecd = E1000_READ_REG(hw, EECD);
+ eecd &= ~E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ }
+
+ return 0;
+}
+
+/******************************************************************************
+ * Verifies that the EEPROM has a valid checksum
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Reads the first 64 16 bit words of the EEPROM and sums the values read.
+ * If the the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is
+ * valid.
+ *****************************************************************************/
+int32_t
+e1000_validate_eeprom_checksum(struct e1000_hw *hw)
+{
+ uint16_t checksum = 0;
+ uint16_t i, eeprom_data;
+
+ DEBUGFUNC("e1000_validate_eeprom_checksum");
+
+ for(i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) {
+ if(e1000_read_eeprom(hw, i, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+ checksum += eeprom_data;
+ }
+
+ if(checksum == (uint16_t) EEPROM_SUM) {
+ return 0;
+ } else {
+ DEBUGOUT("EEPROM Checksum Invalid\n");
+ return -E1000_ERR_EEPROM;
+ }
+}
+
+/******************************************************************************
+ * Calculates the EEPROM checksum and writes it to the EEPROM
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA.
+ * Writes the difference to word offset 63 of the EEPROM.
+ *****************************************************************************/
+int32_t
+e1000_update_eeprom_checksum(struct e1000_hw *hw)
+{
+ uint16_t checksum = 0;
+ uint16_t i, eeprom_data;
+
+ DEBUGFUNC("e1000_update_eeprom_checksum");
+
+ for(i = 0; i < EEPROM_CHECKSUM_REG; i++) {
+ if(e1000_read_eeprom(hw, i, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+ checksum += eeprom_data;
+ }
+ checksum = (uint16_t) EEPROM_SUM - checksum;
+ if(e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, checksum) < 0) {
+ DEBUGOUT("EEPROM Write Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Writes a 16 bit word to a given offset in the EEPROM.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset within the EEPROM to be written to
+ * data - 16 bit word to be writen to the EEPROM
+ *
+ * If e1000_update_eeprom_checksum is not called after this function, the
+ * EEPROM will most likely contain an invalid checksum.
+ *****************************************************************************/
+int32_t
+e1000_write_eeprom(struct e1000_hw *hw,
+ uint16_t offset,
+ uint16_t data)
+{
+ uint32_t eecd;
+ uint32_t i = 0;
+ int32_t status = 0;
+ boolean_t large_eeprom = FALSE;
+
+ DEBUGFUNC("e1000_write_eeprom");
+
+ /* Request EEPROM Access */
+ if(hw->mac_type > e1000_82544) {
+ eecd = E1000_READ_REG(hw, EECD);
+ if(eecd & E1000_EECD_SIZE) large_eeprom = TRUE;
+ eecd |= E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ eecd = E1000_READ_REG(hw, EECD);
+ while((!(eecd & E1000_EECD_GNT)) && (i < 100)) {
+ i++;
+ udelay(5);
+ eecd = E1000_READ_REG(hw, EECD);
+ }
+ if(!(eecd & E1000_EECD_GNT)) {
+ eecd &= ~E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ DEBUGOUT("Could not acquire EEPROM grant\n");
+ return -E1000_ERR_EEPROM;
+ }
+ }
+
+ /* Prepare the EEPROM for writing */
+ e1000_setup_eeprom(hw);
+
+ /* Send the 9-bit (or 11-bit on large EEPROM) EWEN (write enable) command
+ * to the EEPROM (5-bit opcode plus 4/6-bit dummy). This puts the EEPROM
+ * into write/erase mode.
+ */
+ e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE, 5);
+ if(large_eeprom)
+ e1000_shift_out_ee_bits(hw, 0, 6);
+ else
+ e1000_shift_out_ee_bits(hw, 0, 4);
+
+ /* Prepare the EEPROM */
+ e1000_standby_eeprom(hw);
+
+ /* Send the Write command (3-bit opcode + addr) */
+ e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE, 3);
+ if(large_eeprom)
+ /* If we have a 256 word EEPROM, there are 8 address bits */
+ e1000_shift_out_ee_bits(hw, offset, 8);
+ else
+ /* If we have a 64 word EEPROM, there are 6 address bits */
+ e1000_shift_out_ee_bits(hw, offset, 6);
+
+ /* Send the data */
+ e1000_shift_out_ee_bits(hw, data, 16);
+
+ /* Toggle the CS line. This in effect tells to EEPROM to actually execute
+ * the command in question.
+ */
+ e1000_standby_eeprom(hw);
+
+ /* Now read DO repeatedly until is high (equal to '1'). The EEEPROM will
+ * signal that the command has been completed by raising the DO signal.
+ * If DO does not go high in 10 milliseconds, then error out.
+ */
+ for(i = 0; i < 200; i++) {
+ eecd = E1000_READ_REG(hw, EECD);
+ if(eecd & E1000_EECD_DO) break;
+ udelay(50);
+ }
+ if(i == 200) {
+ DEBUGOUT("EEPROM Write did not complete\n");
+ status = -E1000_ERR_EEPROM;
+ }
+
+ /* Recover from write */
+ e1000_standby_eeprom(hw);
+
+ /* Send the 9-bit (or 11-bit on large EEPROM) EWDS (write disable) command
+ * to the EEPROM (5-bit opcode plus 4/6-bit dummy). This takes the EEPROM
+ * out of write/erase mode.
+ */
+ e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE, 5);
+ if(large_eeprom)
+ e1000_shift_out_ee_bits(hw, 0, 6);
+ else
+ e1000_shift_out_ee_bits(hw, 0, 4);
+
+ /* Done with writing */
+ e1000_cleanup_eeprom(hw);
+
+ /* Stop requesting EEPROM access */
+ if(hw->mac_type > e1000_82544) {
+ eecd = E1000_READ_REG(hw, EECD);
+ eecd &= ~E1000_EECD_REQ;
+ E1000_WRITE_REG(hw, EECD, eecd);
+ }
+
+ return status;
+}
+
+/******************************************************************************
+ * Reads the adapter's part number from the EEPROM
+ *
+ * hw - Struct containing variables accessed by shared code
+ * part_num - Adapter's part number
+ *****************************************************************************/
+int32_t
+e1000_read_part_num(struct e1000_hw *hw,
+ uint32_t *part_num)
+{
+ uint16_t offset = EEPROM_PBA_BYTE_1;
+ uint16_t eeprom_data;
+
+ DEBUGFUNC("e1000_read_part_num");
+
+ /* Get word 0 from EEPROM */
+ if(e1000_read_eeprom(hw, offset, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+ /* Save word 0 in upper half of part_num */
+ *part_num = (uint32_t) (eeprom_data << 16);
+
+ /* Get word 1 from EEPROM */
+ if(e1000_read_eeprom(hw, ++offset, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+ /* Save word 1 in lower half of part_num */
+ *part_num |= eeprom_data;
+
+ return 0;
+}
+
+/******************************************************************************
+ * Reads the adapter's MAC address from the EEPROM and inverts the LSB for the
+ * second function of dual function devices
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_read_mac_addr(struct e1000_hw * hw)
+{
+ uint16_t offset;
+ uint16_t eeprom_data, i;
+
+ DEBUGFUNC("e1000_read_mac_addr");
+
+ for(i = 0; i < NODE_ADDRESS_SIZE; i += 2) {
+ offset = i >> 1;
+ if(e1000_read_eeprom(hw, offset, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+ hw->perm_mac_addr[i] = (uint8_t) (eeprom_data & 0x00FF);
+ hw->perm_mac_addr[i+1] = (uint8_t) (eeprom_data >> 8);
+ }
+ if((hw->mac_type == e1000_82546) &&
+ (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) {
+ if(hw->perm_mac_addr[5] & 0x01)
+ hw->perm_mac_addr[5] &= ~(0x01);
+ else
+ hw->perm_mac_addr[5] |= 0x01;
+ }
+ for(i = 0; i < NODE_ADDRESS_SIZE; i++)
+ hw->mac_addr[i] = hw->perm_mac_addr[i];
+ return 0;
+}
+
+/******************************************************************************
+ * Initializes receive address filters.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Places the MAC address in receive address register 0 and clears the rest
+ * of the receive addresss registers. Clears the multicast table. Assumes
+ * the receiver is in reset when the routine is called.
+ *****************************************************************************/
+void
+e1000_init_rx_addrs(struct e1000_hw *hw)
+{
+ uint32_t i;
+ uint32_t addr_low;
+ uint32_t addr_high;
+
+ DEBUGFUNC("e1000_init_rx_addrs");
+
+ /* Setup the receive address. */
+ DEBUGOUT("Programming MAC Address into RAR[0]\n");
+ addr_low = (hw->mac_addr[0] |
+ (hw->mac_addr[1] << 8) |
+ (hw->mac_addr[2] << 16) | (hw->mac_addr[3] << 24));
+
+ addr_high = (hw->mac_addr[4] |
+ (hw->mac_addr[5] << 8) | E1000_RAH_AV);
+
+ E1000_WRITE_REG_ARRAY(hw, RA, 0, addr_low);
+ E1000_WRITE_REG_ARRAY(hw, RA, 1, addr_high);
+
+ /* Zero out the other 15 receive addresses. */
+ DEBUGOUT("Clearing RAR[1-15]\n");
+ for(i = 1; i < E1000_RAR_ENTRIES; i++) {
+ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
+ E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
+ }
+}
+
+/******************************************************************************
+ * Updates the MAC's list of multicast addresses.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * mc_addr_list - the list of new multicast addresses
+ * mc_addr_count - number of addresses
+ * pad - number of bytes between addresses in the list
+ *
+ * The given list replaces any existing list. Clears the last 15 receive
+ * address registers and the multicast table. Uses receive address registers
+ * for the first 15 multicast addresses, and hashes the rest into the
+ * multicast table.
+ *****************************************************************************/
+void
+e1000_mc_addr_list_update(struct e1000_hw *hw,
+ uint8_t *mc_addr_list,
+ uint32_t mc_addr_count,
+ uint32_t pad)
+{
+ uint32_t hash_value;
+ uint32_t i;
+ uint32_t rar_used_count = 1; /* RAR[0] is used for our MAC address */
+
+ DEBUGFUNC("e1000_mc_addr_list_update");
+
+ /* Set the new number of MC addresses that we are being requested to use. */
+ hw->num_mc_addrs = mc_addr_count;
+
+ /* Clear RAR[1-15] */
+ DEBUGOUT(" Clearing RAR[1-15]\n");
+ for(i = rar_used_count; i < E1000_RAR_ENTRIES; i++) {
+ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
+ E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
+ }
+
+ /* Clear the MTA */
+ DEBUGOUT(" Clearing MTA\n");
+ for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++) {
+ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+ }
+
+ /* Add the new addresses */
+ for(i = 0; i < mc_addr_count; i++) {
+ DEBUGOUT(" Adding the multicast addresses:\n");
+ DEBUGOUT7(" MC Addr #%d =%.2X %.2X %.2X %.2X %.2X %.2X\n", i,
+ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad)],
+ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 1],
+ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 2],
+ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 3],
+ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 4],
+ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 5]);
+
+ hash_value = e1000_hash_mc_addr(hw,
+ mc_addr_list +
+ (i * (ETH_LENGTH_OF_ADDRESS + pad)));
+
+ DEBUGOUT1(" Hash value = 0x%03X\n", hash_value);
+
+ /* Place this multicast address in the RAR if there is room, *
+ * else put it in the MTA
+ */
+ if(rar_used_count < E1000_RAR_ENTRIES) {
+ e1000_rar_set(hw,
+ mc_addr_list + (i * (ETH_LENGTH_OF_ADDRESS + pad)),
+ rar_used_count);
+ rar_used_count++;
+ } else {
+ e1000_mta_set(hw, hash_value);
+ }
+ }
+ DEBUGOUT("MC Update Complete\n");
+}
+
+/******************************************************************************
+ * Hashes an address to determine its location in the multicast table
+ *
+ * hw - Struct containing variables accessed by shared code
+ * mc_addr - the multicast address to hash
+ *****************************************************************************/
+uint32_t
+e1000_hash_mc_addr(struct e1000_hw *hw,
+ uint8_t *mc_addr)
+{
+ uint32_t hash_value = 0;
+
+ /* The portion of the address that is used for the hash table is
+ * determined by the mc_filter_type setting.
+ */
+ switch (hw->mc_filter_type) {
+ /* [0] [1] [2] [3] [4] [5]
+ * 01 AA 00 12 34 56
+ * LSB MSB
+ */
+ case 0:
+ /* [47:36] i.e. 0x563 for above example address */
+ hash_value = ((mc_addr[4] >> 4) | (((uint16_t) mc_addr[5]) << 4));
+ break;
+ case 1:
+ /* [46:35] i.e. 0xAC6 for above example address */
+ hash_value = ((mc_addr[4] >> 3) | (((uint16_t) mc_addr[5]) << 5));
+ break;
+ case 2:
+ /* [45:34] i.e. 0x5D8 for above example address */
+ hash_value = ((mc_addr[4] >> 2) | (((uint16_t) mc_addr[5]) << 6));
+ break;
+ case 3:
+ /* [43:32] i.e. 0x634 for above example address */
+ hash_value = ((mc_addr[4]) | (((uint16_t) mc_addr[5]) << 8));
+ break;
+ }
+
+ hash_value &= 0xFFF;
+ return hash_value;
+}
+
+/******************************************************************************
+ * Sets the bit in the multicast table corresponding to the hash value.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * hash_value - Multicast address hash value
+ *****************************************************************************/
+void
+e1000_mta_set(struct e1000_hw *hw,
+ uint32_t hash_value)
+{
+ uint32_t hash_bit, hash_reg;
+ uint32_t mta;
+ uint32_t temp;
+
+ /* The MTA is a register array of 128 32-bit registers.
+ * It is treated like an array of 4096 bits. We want to set
+ * bit BitArray[hash_value]. So we figure out what register
+ * the bit is in, read it, OR in the new bit, then write
+ * back the new value. The register is determined by the
+ * upper 7 bits of the hash value and the bit within that
+ * register are determined by the lower 5 bits of the value.
+ */
+ hash_reg = (hash_value >> 5) & 0x7F;
+ hash_bit = hash_value & 0x1F;
+
+ mta = E1000_READ_REG_ARRAY(hw, MTA, hash_reg);
+
+ mta |= (1 << hash_bit);
+
+ /* If we are on an 82544 and we are trying to write an odd offset
+ * in the MTA, save off the previous entry before writing and
+ * restore the old value after writing.
+ */
+ if((hw->mac_type == e1000_82544) && ((hash_reg & 0x1) == 1)) {
+ temp = E1000_READ_REG_ARRAY(hw, MTA, (hash_reg - 1));
+ E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta);
+ E1000_WRITE_REG_ARRAY(hw, MTA, (hash_reg - 1), temp);
+ } else {
+ E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta);
+ }
+}
+
+/******************************************************************************
+ * Puts an ethernet address into a receive address register.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * addr - Address to put into receive address register
+ * index - Receive address register to write
+ *****************************************************************************/
+void
+e1000_rar_set(struct e1000_hw *hw,
+ uint8_t *addr,
+ uint32_t index)
+{
+ uint32_t rar_low, rar_high;
+
+ /* HW expects these in little endian so we reverse the byte order
+ * from network order (big endian) to little endian
+ */
+ rar_low = ((uint32_t) addr[0] |
+ ((uint32_t) addr[1] << 8) |
+ ((uint32_t) addr[2] << 16) | ((uint32_t) addr[3] << 24));
+
+ rar_high = ((uint32_t) addr[4] | ((uint32_t) addr[5] << 8) | E1000_RAH_AV);
+
+ E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low);
+ E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high);
+}
+
+/******************************************************************************
+ * Writes a value to the specified offset in the VLAN filter table.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - Offset in VLAN filer table to write
+ * value - Value to write into VLAN filter table
+ *****************************************************************************/
+void
+e1000_write_vfta(struct e1000_hw *hw,
+ uint32_t offset,
+ uint32_t value)
+{
+ uint32_t temp;
+
+ if((hw->mac_type == e1000_82544) && ((offset & 0x1) == 1)) {
+ temp = E1000_READ_REG_ARRAY(hw, VFTA, (offset - 1));
+ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
+ E1000_WRITE_REG_ARRAY(hw, VFTA, (offset - 1), temp);
+ } else {
+ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
+ }
+}
+
+/******************************************************************************
+ * Clears the VLAN filer table
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_clear_vfta(struct e1000_hw *hw)
+{
+ uint32_t offset;
+
+ for(offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++)
+ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0);
+}
+
+static int32_t
+e1000_id_led_init(struct e1000_hw * hw)
+{
+ uint32_t ledctl;
+ const uint32_t ledctl_mask = 0x000000FF;
+ const uint32_t ledctl_on = E1000_LEDCTL_MODE_LED_ON;
+ const uint32_t ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
+ uint16_t eeprom_data, i, temp;
+ const uint16_t led_mask = 0x0F;
+
+ DEBUGFUNC("e1000_id_led_init");
+
+ if(hw->mac_type < e1000_82540) {
+ /* Nothing to do */
+ return 0;
+ }
+
+ ledctl = E1000_READ_REG(hw, LEDCTL);
+ hw->ledctl_default = ledctl;
+ hw->ledctl_mode1 = hw->ledctl_default;
+ hw->ledctl_mode2 = hw->ledctl_default;
+
+ if(e1000_read_eeprom(hw, EEPROM_ID_LED_SETTINGS, &eeprom_data) < 0) {
+ DEBUGOUT("EEPROM Read Error\n");
+ return -E1000_ERR_EEPROM;
+ }
+ if((eeprom_data== ID_LED_RESERVED_0000) ||
+ (eeprom_data == ID_LED_RESERVED_FFFF)) eeprom_data = ID_LED_DEFAULT;
+ for(i = 0; i < 4; i++) {
+ temp = (eeprom_data >> (i << 2)) & led_mask;
+ switch(temp) {
+ case ID_LED_ON1_DEF2:
+ case ID_LED_ON1_ON2:
+ case ID_LED_ON1_OFF2:
+ hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+ hw->ledctl_mode1 |= ledctl_on << (i << 3);
+ break;
+ case ID_LED_OFF1_DEF2:
+ case ID_LED_OFF1_ON2:
+ case ID_LED_OFF1_OFF2:
+ hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+ hw->ledctl_mode1 |= ledctl_off << (i << 3);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+ switch(temp) {
+ case ID_LED_DEF1_ON2:
+ case ID_LED_ON1_ON2:
+ case ID_LED_OFF1_ON2:
+ hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+ hw->ledctl_mode2 |= ledctl_on << (i << 3);
+ break;
+ case ID_LED_DEF1_OFF2:
+ case ID_LED_ON1_OFF2:
+ case ID_LED_OFF1_OFF2:
+ hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+ hw->ledctl_mode2 |= ledctl_off << (i << 3);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Prepares SW controlable LED for use and saves the current state of the LED.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_setup_led(struct e1000_hw *hw)
+{
+ uint32_t ledctl;
+
+ DEBUGFUNC("e1000_setup_led");
+
+ switch(hw->device_id) {
+ case E1000_DEV_ID_82542:
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
+ case E1000_DEV_ID_82544GC_COPPER:
+ case E1000_DEV_ID_82544GC_LOM:
+ /* No setup necessary */
+ break;
+ case E1000_DEV_ID_82545EM_FIBER:
+ case E1000_DEV_ID_82546EB_FIBER:
+ ledctl = E1000_READ_REG(hw, LEDCTL);
+ /* Save current LEDCTL settings */
+ hw->ledctl_default = ledctl;
+ /* Turn off LED0 */
+ ledctl &= ~(E1000_LEDCTL_LED0_IVRT |
+ E1000_LEDCTL_LED0_BLINK |
+ E1000_LEDCTL_LED0_MODE_MASK);
+ ledctl |= (E1000_LEDCTL_MODE_LED_OFF << E1000_LEDCTL_LED0_MODE_SHIFT);
+ E1000_WRITE_REG(hw, LEDCTL, ledctl);
+ break;
+ case E1000_DEV_ID_82540EP:
+ case E1000_DEV_ID_82540EP_LOM:
+ case E1000_DEV_ID_82540EP_LP:
+ case E1000_DEV_ID_82540EM:
+ case E1000_DEV_ID_82540EM_LOM:
+ case E1000_DEV_ID_82545EM_COPPER:
+ case E1000_DEV_ID_82546EB_COPPER:
+ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1);
+ break;
+ default:
+ DEBUGOUT("Invalid device ID\n");
+ return -E1000_ERR_CONFIG;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Restores the saved state of the SW controlable LED.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_cleanup_led(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_cleanup_led");
+
+ switch(hw->device_id) {
+ case E1000_DEV_ID_82542:
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
+ case E1000_DEV_ID_82544GC_COPPER:
+ case E1000_DEV_ID_82544GC_LOM:
+ /* No cleanup necessary */
+ break;
+ case E1000_DEV_ID_82540EP:
+ case E1000_DEV_ID_82540EP_LOM:
+ case E1000_DEV_ID_82540EP_LP:
+ case E1000_DEV_ID_82540EM:
+ case E1000_DEV_ID_82540EM_LOM:
+ case E1000_DEV_ID_82545EM_COPPER:
+ case E1000_DEV_ID_82545EM_FIBER:
+ case E1000_DEV_ID_82546EB_COPPER:
+ case E1000_DEV_ID_82546EB_FIBER:
+ /* Restore LEDCTL settings */
+ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_default);
+ break;
+ default:
+ DEBUGOUT("Invalid device ID\n");
+ return -E1000_ERR_CONFIG;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Turns on the software controllable LED
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_led_on(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+
+ DEBUGFUNC("e1000_led_on");
+
+ switch(hw->device_id) {
+ case E1000_DEV_ID_82542:
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
+ ctrl = E1000_READ_REG(hw, CTRL);
+ /* Set SW Defineable Pin 0 to turn on the LED */
+ ctrl |= E1000_CTRL_SWDPIN0;
+ ctrl |= E1000_CTRL_SWDPIO0;
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ break;
+ case E1000_DEV_ID_82544EI_COPPER:
+ case E1000_DEV_ID_82544GC_COPPER:
+ case E1000_DEV_ID_82544GC_LOM:
+ case E1000_DEV_ID_82545EM_FIBER:
+ case E1000_DEV_ID_82546EB_FIBER:
+ ctrl = E1000_READ_REG(hw, CTRL);
+ /* Clear SW Defineable Pin 0 to turn on the LED */
+ ctrl &= ~E1000_CTRL_SWDPIN0;
+ ctrl |= E1000_CTRL_SWDPIO0;
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ break;
+ case E1000_DEV_ID_82540EP:
+ case E1000_DEV_ID_82540EP_LOM:
+ case E1000_DEV_ID_82540EP_LP:
+ case E1000_DEV_ID_82540EM:
+ case E1000_DEV_ID_82540EM_LOM:
+ case E1000_DEV_ID_82545EM_COPPER:
+ case E1000_DEV_ID_82546EB_COPPER:
+ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode2);
+ break;
+ default:
+ DEBUGOUT("Invalid device ID\n");
+ return -E1000_ERR_CONFIG;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Turns off the software controllable LED
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_led_off(struct e1000_hw *hw)
+{
+ uint32_t ctrl;
+
+ DEBUGFUNC("e1000_led_off");
+
+ switch(hw->device_id) {
+ case E1000_DEV_ID_82542:
+ case E1000_DEV_ID_82543GC_FIBER:
+ case E1000_DEV_ID_82543GC_COPPER:
+ case E1000_DEV_ID_82544EI_FIBER:
+ ctrl = E1000_READ_REG(hw, CTRL);
+ /* Clear SW Defineable Pin 0 to turn off the LED */
+ ctrl &= ~E1000_CTRL_SWDPIN0;
+ ctrl |= E1000_CTRL_SWDPIO0;
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ break;
+ case E1000_DEV_ID_82544EI_COPPER:
+ case E1000_DEV_ID_82544GC_COPPER:
+ case E1000_DEV_ID_82544GC_LOM:
+ case E1000_DEV_ID_82545EM_FIBER:
+ case E1000_DEV_ID_82546EB_FIBER:
+ ctrl = E1000_READ_REG(hw, CTRL);
+ /* Set SW Defineable Pin 0 to turn off the LED */
+ ctrl |= E1000_CTRL_SWDPIN0;
+ ctrl |= E1000_CTRL_SWDPIO0;
+ E1000_WRITE_REG(hw, CTRL, ctrl);
+ break;
+ case E1000_DEV_ID_82540EP:
+ case E1000_DEV_ID_82540EP_LOM:
+ case E1000_DEV_ID_82540EP_LP:
+ case E1000_DEV_ID_82540EM:
+ case E1000_DEV_ID_82540EM_LOM:
+ case E1000_DEV_ID_82545EM_COPPER:
+ case E1000_DEV_ID_82546EB_COPPER:
+ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1);
+ break;
+ default:
+ DEBUGOUT("Invalid device ID\n");
+ return -E1000_ERR_CONFIG;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * Clears all hardware statistics counters.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_clear_hw_cntrs(struct e1000_hw *hw)
+{
+ volatile uint32_t temp;
+
+ temp = E1000_READ_REG(hw, CRCERRS);
+ temp = E1000_READ_REG(hw, SYMERRS);
+ temp = E1000_READ_REG(hw, MPC);
+ temp = E1000_READ_REG(hw, SCC);
+ temp = E1000_READ_REG(hw, ECOL);
+ temp = E1000_READ_REG(hw, MCC);
+ temp = E1000_READ_REG(hw, LATECOL);
+ temp = E1000_READ_REG(hw, COLC);
+ temp = E1000_READ_REG(hw, DC);
+ temp = E1000_READ_REG(hw, SEC);
+ temp = E1000_READ_REG(hw, RLEC);
+ temp = E1000_READ_REG(hw, XONRXC);
+ temp = E1000_READ_REG(hw, XONTXC);
+ temp = E1000_READ_REG(hw, XOFFRXC);
+ temp = E1000_READ_REG(hw, XOFFTXC);
+ temp = E1000_READ_REG(hw, FCRUC);
+ temp = E1000_READ_REG(hw, PRC64);
+ temp = E1000_READ_REG(hw, PRC127);
+ temp = E1000_READ_REG(hw, PRC255);
+ temp = E1000_READ_REG(hw, PRC511);
+ temp = E1000_READ_REG(hw, PRC1023);
+ temp = E1000_READ_REG(hw, PRC1522);
+ temp = E1000_READ_REG(hw, GPRC);
+ temp = E1000_READ_REG(hw, BPRC);
+ temp = E1000_READ_REG(hw, MPRC);
+ temp = E1000_READ_REG(hw, GPTC);
+ temp = E1000_READ_REG(hw, GORCL);
+ temp = E1000_READ_REG(hw, GORCH);
+ temp = E1000_READ_REG(hw, GOTCL);
+ temp = E1000_READ_REG(hw, GOTCH);
+ temp = E1000_READ_REG(hw, RNBC);
+ temp = E1000_READ_REG(hw, RUC);
+ temp = E1000_READ_REG(hw, RFC);
+ temp = E1000_READ_REG(hw, ROC);
+ temp = E1000_READ_REG(hw, RJC);
+ temp = E1000_READ_REG(hw, TORL);
+ temp = E1000_READ_REG(hw, TORH);
+ temp = E1000_READ_REG(hw, TOTL);
+ temp = E1000_READ_REG(hw, TOTH);
+ temp = E1000_READ_REG(hw, TPR);
+ temp = E1000_READ_REG(hw, TPT);
+ temp = E1000_READ_REG(hw, PTC64);
+ temp = E1000_READ_REG(hw, PTC127);
+ temp = E1000_READ_REG(hw, PTC255);
+ temp = E1000_READ_REG(hw, PTC511);
+ temp = E1000_READ_REG(hw, PTC1023);
+ temp = E1000_READ_REG(hw, PTC1522);
+ temp = E1000_READ_REG(hw, MPTC);
+ temp = E1000_READ_REG(hw, BPTC);
+
+ if(hw->mac_type < e1000_82543) return;
+
+ temp = E1000_READ_REG(hw, ALGNERRC);
+ temp = E1000_READ_REG(hw, RXERRC);
+ temp = E1000_READ_REG(hw, TNCRS);
+ temp = E1000_READ_REG(hw, CEXTERR);
+ temp = E1000_READ_REG(hw, TSCTC);
+ temp = E1000_READ_REG(hw, TSCTFC);
+
+ if(hw->mac_type <= e1000_82544) return;
+
+ temp = E1000_READ_REG(hw, MGTPRC);
+ temp = E1000_READ_REG(hw, MGTPDC);
+ temp = E1000_READ_REG(hw, MGTPTC);
+}
+
+/******************************************************************************
+ * Resets Adaptive IFS to its default state.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Call this after e1000_init_hw. You may override the IFS defaults by setting
+ * hw->ifs_params_forced to TRUE. However, you must initialize hw->
+ * current_ifs_val, ifs_min_val, ifs_max_val, ifs_step_size, and ifs_ratio
+ * before calling this function.
+ *****************************************************************************/
+void
+e1000_reset_adaptive(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_reset_adaptive");
+
+ if(hw->adaptive_ifs) {
+ if(!hw->ifs_params_forced) {
+ hw->current_ifs_val = 0;
+ hw->ifs_min_val = IFS_MIN;
+ hw->ifs_max_val = IFS_MAX;
+ hw->ifs_step_size = IFS_STEP;
+ hw->ifs_ratio = IFS_RATIO;
+ }
+ hw->in_ifs_mode = FALSE;
+ E1000_WRITE_REG(hw, AIT, 0);
+ } else {
+ DEBUGOUT("Not in Adaptive IFS mode!\n");
+ }
+}
+
+/******************************************************************************
+ * Called during the callback/watchdog routine to update IFS value based on
+ * the ratio of transmits to collisions.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * tx_packets - Number of transmits since last callback
+ * total_collisions - Number of collisions since last callback
+ *****************************************************************************/
+void
+e1000_update_adaptive(struct e1000_hw *hw)
+{
+ DEBUGFUNC("e1000_update_adaptive");
+
+ if(hw->adaptive_ifs) {
+ if((hw->collision_delta * hw->ifs_ratio) >
+ hw->tx_packet_delta) {
+ if(hw->tx_packet_delta > MIN_NUM_XMITS) {
+ hw->in_ifs_mode = TRUE;
+ if(hw->current_ifs_val < hw->ifs_max_val) {
+ if(hw->current_ifs_val == 0)
+ hw->current_ifs_val = hw->ifs_min_val;
+ else
+ hw->current_ifs_val += hw->ifs_step_size;
+ E1000_WRITE_REG(hw, AIT, hw->current_ifs_val);
+ }
+ }
+ } else {
+ if((hw->in_ifs_mode == TRUE) &&
+ (hw->tx_packet_delta <= MIN_NUM_XMITS)) {
+ hw->current_ifs_val = 0;
+ hw->in_ifs_mode = FALSE;
+ E1000_WRITE_REG(hw, AIT, 0);
+ }
+ }
+ } else {
+ DEBUGOUT("Not in Adaptive IFS mode!\n");
+ }
+}
+
+/******************************************************************************
+ * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
+ *
+ * hw - Struct containing variables accessed by shared code
+ * frame_len - The length of the frame in question
+ * mac_addr - The Ethernet destination address of the frame in question
+ *****************************************************************************/
+void
+e1000_tbi_adjust_stats(struct e1000_hw *hw,
+ struct e1000_hw_stats *stats,
+ uint32_t frame_len,
+ uint8_t *mac_addr)
+{
+ uint64_t carry_bit;
+
+ /* First adjust the frame length. */
+ frame_len--;
+ /* We need to adjust the statistics counters, since the hardware
+ * counters overcount this packet as a CRC error and undercount
+ * the packet as a good packet
+ */
+ /* This packet should not be counted as a CRC error. */
+ stats->crcerrs--;
+ /* This packet does count as a Good Packet Received. */
+ stats->gprc++;
+
+ /* Adjust the Good Octets received counters */
+ carry_bit = 0x80000000 & stats->gorcl;
+ stats->gorcl += frame_len;
+ /* If the high bit of Gorcl (the low 32 bits of the Good Octets
+ * Received Count) was one before the addition,
+ * AND it is zero after, then we lost the carry out,
+ * need to add one to Gorch (Good Octets Received Count High).
+ * This could be simplified if all environments supported
+ * 64-bit integers.
+ */
+ if(carry_bit && ((stats->gorcl & 0x80000000) == 0))
+ stats->gorch++;
+ /* Is this a broadcast or multicast? Check broadcast first,
+ * since the test for a multicast frame will test positive on
+ * a broadcast frame.
+ */
+ if((mac_addr[0] == (uint8_t) 0xff) && (mac_addr[1] == (uint8_t) 0xff))
+ /* Broadcast packet */
+ stats->bprc++;
+ else if(*mac_addr & 0x01)
+ /* Multicast packet */
+ stats->mprc++;
+
+ if(frame_len == hw->max_frame_size) {
+ /* In this case, the hardware has overcounted the number of
+ * oversize frames.
+ */
+ if(stats->roc > 0)
+ stats->roc--;
+ }
+
+ /* Adjust the bin counters when the extra byte put the frame in the
+ * wrong bin. Remember that the frame_len was adjusted above.
+ */
+ if(frame_len == 64) {
+ stats->prc64++;
+ stats->prc127--;
+ } else if(frame_len == 127) {
+ stats->prc127++;
+ stats->prc255--;
+ } else if(frame_len == 255) {
+ stats->prc255++;
+ stats->prc511--;
+ } else if(frame_len == 511) {
+ stats->prc511++;
+ stats->prc1023--;
+ } else if(frame_len == 1023) {
+ stats->prc1023++;
+ stats->prc1522--;
+ } else if(frame_len == 1522) {
+ stats->prc1522++;
+ }
+}
+
+/******************************************************************************
+ * Gets the current PCI bus type, speed, and width of the hardware
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_get_bus_info(struct e1000_hw *hw)
+{
+ uint32_t status;
+
+ if(hw->mac_type < e1000_82543) {
+ hw->bus_type = e1000_bus_type_unknown;
+ hw->bus_speed = e1000_bus_speed_unknown;
+ hw->bus_width = e1000_bus_width_unknown;
+ return;
+ }
+
+ status = E1000_READ_REG(hw, STATUS);
+ hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ?
+ e1000_bus_type_pcix : e1000_bus_type_pci;
+ if(hw->bus_type == e1000_bus_type_pci) {
+ hw->bus_speed = (status & E1000_STATUS_PCI66) ?
+ e1000_bus_speed_66 : e1000_bus_speed_33;
+ } else {
+ switch (status & E1000_STATUS_PCIX_SPEED) {
+ case E1000_STATUS_PCIX_SPEED_66:
+ hw->bus_speed = e1000_bus_speed_66;
+ break;
+ case E1000_STATUS_PCIX_SPEED_100:
+ hw->bus_speed = e1000_bus_speed_100;
+ break;
+ case E1000_STATUS_PCIX_SPEED_133:
+ hw->bus_speed = e1000_bus_speed_133;
+ break;
+ default:
+ hw->bus_speed = e1000_bus_speed_reserved;
+ break;
+ }
+ }
+ hw->bus_width = (status & E1000_STATUS_BUS64) ?
+ e1000_bus_width_64 : e1000_bus_width_32;
+}
+/******************************************************************************
+ * Reads a value from one of the devices registers using port I/O (as opposed
+ * memory mapped I/O). Only 82544 and newer devices support port I/O.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset to read from
+ *****************************************************************************/
+uint32_t
+e1000_read_reg_io(struct e1000_hw *hw,
+ uint32_t offset)
+{
+ uint32_t io_addr = hw->io_base;
+ uint32_t io_data = hw->io_base + 4;
+
+ e1000_io_write(hw, io_addr, offset);
+ return e1000_io_read(hw, io_data);
+}
+
+/******************************************************************************
+ * Writes a value to one of the devices registers using port I/O (as opposed to
+ * memory mapped I/O). Only 82544 and newer devices support port I/O.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset to write to
+ * value - value to write
+ *****************************************************************************/
+void
+e1000_write_reg_io(struct e1000_hw *hw,
+ uint32_t offset,
+ uint32_t value)
+{
+ uint32_t io_addr = hw->io_base;
+ uint32_t io_data = hw->io_base + 4;
+
+ e1000_io_write(hw, io_addr, offset);
+ e1000_io_write(hw, io_data, value);
+}
+
diff --git a/xen/drivers/net/e1000/e1000_hw.h b/xen/drivers/net/e1000/e1000_hw.h
new file mode 100644
index 0000000000..812dfd140f
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_hw.h
@@ -0,0 +1,1789 @@
+/*******************************************************************************
+
+
+ Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* e1000_hw.h
+ * Structures, enums, and macros for the MAC
+ */
+
+#ifndef _E1000_HW_H_
+#define _E1000_HW_H_
+
+#include "e1000_osdep.h"
+
+/* Forward declarations of structures used by the shared code */
+struct e1000_hw;
+struct e1000_hw_stats;
+
+/* Enumerated types specific to the e1000 hardware */
+/* Media Access Controlers */
+typedef enum {
+ e1000_undefined = 0,
+ e1000_82542_rev2_0,
+ e1000_82542_rev2_1,
+ e1000_82543,
+ e1000_82544,
+ e1000_82540,
+ e1000_82545,
+ e1000_82546,
+ e1000_num_macs
+} e1000_mac_type;
+
+/* Media Types */
+typedef enum {
+ e1000_media_type_copper = 0,
+ e1000_media_type_fiber = 1,
+ e1000_num_media_types
+} e1000_media_type;
+
+typedef enum {
+ e1000_10_half = 0,
+ e1000_10_full = 1,
+ e1000_100_half = 2,
+ e1000_100_full = 3
+} e1000_speed_duplex_type;
+
+/* Flow Control Settings */
+typedef enum {
+ e1000_fc_none = 0,
+ e1000_fc_rx_pause = 1,
+ e1000_fc_tx_pause = 2,
+ e1000_fc_full = 3,
+ e1000_fc_default = 0xFF
+} e1000_fc_type;
+
+/* PCI bus types */
+typedef enum {
+ e1000_bus_type_unknown = 0,
+ e1000_bus_type_pci,
+ e1000_bus_type_pcix
+} e1000_bus_type;
+
+/* PCI bus speeds */
+typedef enum {
+ e1000_bus_speed_unknown = 0,
+ e1000_bus_speed_33,
+ e1000_bus_speed_66,
+ e1000_bus_speed_100,
+ e1000_bus_speed_133,
+ e1000_bus_speed_reserved
+} e1000_bus_speed;
+
+/* PCI bus widths */
+typedef enum {
+ e1000_bus_width_unknown = 0,
+ e1000_bus_width_32,
+ e1000_bus_width_64
+} e1000_bus_width;
+
+/* PHY status info structure and supporting enums */
+typedef enum {
+ e1000_cable_length_50 = 0,
+ e1000_cable_length_50_80,
+ e1000_cable_length_80_110,
+ e1000_cable_length_110_140,
+ e1000_cable_length_140,
+ e1000_cable_length_undefined = 0xFF
+} e1000_cable_length;
+
+typedef enum {
+ e1000_10bt_ext_dist_enable_normal = 0,
+ e1000_10bt_ext_dist_enable_lower,
+ e1000_10bt_ext_dist_enable_undefined = 0xFF
+} e1000_10bt_ext_dist_enable;
+
+typedef enum {
+ e1000_rev_polarity_normal = 0,
+ e1000_rev_polarity_reversed,
+ e1000_rev_polarity_undefined = 0xFF
+} e1000_rev_polarity;
+
+typedef enum {
+ e1000_polarity_reversal_enabled = 0,
+ e1000_polarity_reversal_disabled,
+ e1000_polarity_reversal_undefined = 0xFF
+} e1000_polarity_reversal;
+
+typedef enum {
+ e1000_auto_x_mode_manual_mdi = 0,
+ e1000_auto_x_mode_manual_mdix,
+ e1000_auto_x_mode_auto1,
+ e1000_auto_x_mode_auto2,
+ e1000_auto_x_mode_undefined = 0xFF
+} e1000_auto_x_mode;
+
+typedef enum {
+ e1000_1000t_rx_status_not_ok = 0,
+ e1000_1000t_rx_status_ok,
+ e1000_1000t_rx_status_undefined = 0xFF
+} e1000_1000t_rx_status;
+
+struct e1000_phy_info {
+ e1000_cable_length cable_length;
+ e1000_10bt_ext_dist_enable extended_10bt_distance;
+ e1000_rev_polarity cable_polarity;
+ e1000_polarity_reversal polarity_correction;
+ e1000_auto_x_mode mdix_mode;
+ e1000_1000t_rx_status local_rx;
+ e1000_1000t_rx_status remote_rx;
+};
+
+struct e1000_phy_stats {
+ uint32_t idle_errors;
+ uint32_t receive_errors;
+};
+
+
+
+/* Error Codes */
+#define E1000_SUCCESS 0
+#define E1000_ERR_EEPROM 1
+#define E1000_ERR_PHY 2
+#define E1000_ERR_CONFIG 3
+#define E1000_ERR_PARAM 4
+#define E1000_ERR_MAC_TYPE 5
+
+/* Function prototypes */
+/* Initialization */
+void e1000_reset_hw(struct e1000_hw *hw);
+int32_t e1000_init_hw(struct e1000_hw *hw);
+int32_t e1000_set_mac_type(struct e1000_hw *hw);
+
+/* Link Configuration */
+int32_t e1000_setup_link(struct e1000_hw *hw);
+int32_t e1000_phy_setup_autoneg(struct e1000_hw *hw);
+void e1000_config_collision_dist(struct e1000_hw *hw);
+int32_t e1000_config_fc_after_link_up(struct e1000_hw *hw);
+int32_t e1000_check_for_link(struct e1000_hw *hw);
+void e1000_get_speed_and_duplex(struct e1000_hw *hw, uint16_t * speed, uint16_t * duplex);
+int32_t e1000_wait_autoneg(struct e1000_hw *hw);
+
+/* PHY */
+int32_t e1000_read_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *phy_data);
+int32_t e1000_write_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data);
+void e1000_phy_hw_reset(struct e1000_hw *hw);
+int32_t e1000_phy_reset(struct e1000_hw *hw);
+int32_t e1000_detect_gig_phy(struct e1000_hw *hw);
+int32_t e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info);
+int32_t e1000_validate_mdi_setting(struct e1000_hw *hw);
+
+/* EEPROM Functions */
+int32_t e1000_read_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t *data);
+int32_t e1000_validate_eeprom_checksum(struct e1000_hw *hw);
+int32_t e1000_update_eeprom_checksum(struct e1000_hw *hw);
+int32_t e1000_write_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t data);
+int32_t e1000_read_part_num(struct e1000_hw *hw, uint32_t * part_num);
+int32_t e1000_read_mac_addr(struct e1000_hw * hw);
+
+/* Filters (multicast, vlan, receive) */
+void e1000_init_rx_addrs(struct e1000_hw *hw);
+void e1000_mc_addr_list_update(struct e1000_hw *hw, uint8_t * mc_addr_list, uint32_t mc_addr_count, uint32_t pad);
+uint32_t e1000_hash_mc_addr(struct e1000_hw *hw, uint8_t * mc_addr);
+void e1000_mta_set(struct e1000_hw *hw, uint32_t hash_value);
+void e1000_rar_set(struct e1000_hw *hw, uint8_t * mc_addr, uint32_t rar_index);
+void e1000_write_vfta(struct e1000_hw *hw, uint32_t offset, uint32_t value);
+void e1000_clear_vfta(struct e1000_hw *hw);
+
+/* LED functions */
+int32_t e1000_setup_led(struct e1000_hw *hw);
+int32_t e1000_cleanup_led(struct e1000_hw *hw);
+int32_t e1000_led_on(struct e1000_hw *hw);
+int32_t e1000_led_off(struct e1000_hw *hw);
+
+/* Adaptive IFS Functions */
+
+/* Everything else */
+void e1000_clear_hw_cntrs(struct e1000_hw *hw);
+void e1000_reset_adaptive(struct e1000_hw *hw);
+void e1000_update_adaptive(struct e1000_hw *hw);
+void e1000_tbi_adjust_stats(struct e1000_hw *hw, struct e1000_hw_stats *stats, uint32_t frame_len, uint8_t * mac_addr);
+void e1000_get_bus_info(struct e1000_hw *hw);
+void e1000_pci_set_mwi(struct e1000_hw *hw);
+void e1000_pci_clear_mwi(struct e1000_hw *hw);
+void e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value);
+void e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value);
+/* Port I/O is only supported on 82544 and newer */
+uint32_t e1000_io_read(struct e1000_hw *hw, uint32_t port);
+uint32_t e1000_read_reg_io(struct e1000_hw *hw, uint32_t offset);
+void e1000_io_write(struct e1000_hw *hw, uint32_t port, uint32_t value);
+void e1000_write_reg_io(struct e1000_hw *hw, uint32_t offset, uint32_t value);
+#define E1000_READ_REG_IO(a, reg) \
+ e1000_read_reg_io((a), E1000_##reg)
+#define E1000_WRITE_REG_IO(a, reg, val) \
+ e1000_write_reg_io((a), E1000_##reg, val)
+
+/* PCI Device IDs */
+#define E1000_DEV_ID_82542 0x1000
+#define E1000_DEV_ID_82543GC_FIBER 0x1001
+#define E1000_DEV_ID_82543GC_COPPER 0x1004
+#define E1000_DEV_ID_82544EI_COPPER 0x1008
+#define E1000_DEV_ID_82544EI_FIBER 0x1009
+#define E1000_DEV_ID_82544GC_COPPER 0x100C
+#define E1000_DEV_ID_82544GC_LOM 0x100D
+#define E1000_DEV_ID_82540EM 0x100E
+#define E1000_DEV_ID_82540EM_LOM 0x1015
+#define E1000_DEV_ID_82540EP_LOM 0x1016
+#define E1000_DEV_ID_82540EP 0x1017
+#define E1000_DEV_ID_82540EP_LP 0x101E
+#define E1000_DEV_ID_82545EM_COPPER 0x100F
+#define E1000_DEV_ID_82545EM_FIBER 0x1011
+#define E1000_DEV_ID_82546EB_COPPER 0x1010
+#define E1000_DEV_ID_82546EB_FIBER 0x1012
+#define NUM_DEV_IDS 16
+
+#define NODE_ADDRESS_SIZE 6
+#define ETH_LENGTH_OF_ADDRESS 6
+
+/* MAC decode size is 128K - This is the size of BAR0 */
+#define MAC_DECODE_SIZE (128 * 1024)
+
+#define E1000_82542_2_0_REV_ID 2
+#define E1000_82542_2_1_REV_ID 3
+
+#define SPEED_10 10
+#define SPEED_100 100
+#define SPEED_1000 1000
+#define HALF_DUPLEX 1
+#define FULL_DUPLEX 2
+
+/* The sizes (in bytes) of a ethernet packet */
+#define ENET_HEADER_SIZE 14
+#define MAXIMUM_ETHERNET_FRAME_SIZE 1518 /* With FCS */
+#define MINIMUM_ETHERNET_FRAME_SIZE 64 /* With FCS */
+#define ETHERNET_FCS_SIZE 4
+#define MAXIMUM_ETHERNET_PACKET_SIZE \
+ (MAXIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE)
+#define MINIMUM_ETHERNET_PACKET_SIZE \
+ (MINIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE)
+#define CRC_LENGTH ETHERNET_FCS_SIZE
+#define MAX_JUMBO_FRAME_SIZE 0x3F00
+
+
+/* 802.1q VLAN Packet Sizes */
+#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMAed) */
+
+/* Ethertype field values */
+#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */
+#define ETHERNET_IP_TYPE 0x0800 /* IP packets */
+#define ETHERNET_ARP_TYPE 0x0806 /* Address Resolution Protocol (ARP) */
+
+/* Packet Header defines */
+#define IP_PROTOCOL_TCP 6
+#define IP_PROTOCOL_UDP 0x11
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register. Each bit is documented below:
+ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ * o RXSEQ = Receive Sequence Error
+ */
+#define POLL_IMS_ENABLE_MASK ( \
+ E1000_IMS_RXDMT0 | \
+ E1000_IMS_RXSEQ)
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register. Each bit is documented below:
+ * o RXT0 = Receiver Timer Interrupt (ring 0)
+ * o TXDW = Transmit Descriptor Written Back
+ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ * o RXSEQ = Receive Sequence Error
+ * o LSC = Link Status Change
+ */
+#define IMS_ENABLE_MASK ( \
+ E1000_IMS_RXT0 | \
+ E1000_IMS_TXDW | \
+ E1000_IMS_RXDMT0 | \
+ E1000_IMS_RXSEQ | \
+ E1000_IMS_LSC)
+
+/* The number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor. We
+ * reserve one of these spots for our directed address, allowing us room for
+ * E1000_RAR_ENTRIES - 1 multicast addresses.
+ */
+#define E1000_RAR_ENTRIES 16
+
+#define MIN_NUMBER_OF_DESCRIPTORS 8
+#define MAX_NUMBER_OF_DESCRIPTORS 0xFFF8
+
+/* Receive Descriptor */
+struct e1000_rx_desc {
+ uint64_t buffer_addr; /* Address of the descriptor's data buffer */
+ uint16_t length; /* Length of data DMAed into data buffer */
+ uint16_t csum; /* Packet checksum */
+ uint8_t status; /* Descriptor status */
+ uint8_t errors; /* Descriptor Errors */
+ uint16_t special;
+};
+
+/* Receive Decriptor bit definitions */
+#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */
+#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */
+#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */
+#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */
+#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */
+#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */
+#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */
+#define E1000_RXD_ERR_CE 0x01 /* CRC Error */
+#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */
+#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */
+#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */
+#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */
+#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */
+#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */
+#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */
+#define E1000_RXD_SPC_PRI_MASK 0xE000 /* Priority is in upper 3 bits */
+#define E1000_RXD_SPC_PRI_SHIFT 0x000D /* Priority is in upper 3 of 16 */
+#define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */
+#define E1000_RXD_SPC_CFI_SHIFT 0x000C /* CFI is bit 12 */
+
+/* mask to determine if packets should be dropped due to frame errors */
+#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
+ E1000_RXD_ERR_CE | \
+ E1000_RXD_ERR_SE | \
+ E1000_RXD_ERR_SEQ | \
+ E1000_RXD_ERR_CXE | \
+ E1000_RXD_ERR_RXE)
+
+/* Transmit Descriptor */
+struct e1000_tx_desc {
+ uint64_t buffer_addr; /* Address of the descriptor's data buffer */
+ union {
+ uint32_t data;
+ struct {
+ uint16_t length; /* Data buffer length */
+ uint8_t cso; /* Checksum offset */
+ uint8_t cmd; /* Descriptor control */
+ } flags;
+ } lower;
+ union {
+ uint32_t data;
+ struct {
+ uint8_t status; /* Descriptor status */
+ uint8_t css; /* Checksum start */
+ uint16_t special;
+ } fields;
+ } upper;
+};
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */
+#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */
+#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */
+#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */
+#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */
+#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */
+#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */
+#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */
+#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */
+#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */
+#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */
+#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */
+#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */
+#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */
+#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */
+#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */
+#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */
+
+/* Offload Context Descriptor */
+struct e1000_context_desc {
+ union {
+ uint32_t ip_config;
+ struct {
+ uint8_t ipcss; /* IP checksum start */
+ uint8_t ipcso; /* IP checksum offset */
+ uint16_t ipcse; /* IP checksum end */
+ } ip_fields;
+ } lower_setup;
+ union {
+ uint32_t tcp_config;
+ struct {
+ uint8_t tucss; /* TCP checksum start */
+ uint8_t tucso; /* TCP checksum offset */
+ uint16_t tucse; /* TCP checksum end */
+ } tcp_fields;
+ } upper_setup;
+ uint32_t cmd_and_length; /* */
+ union {
+ uint32_t data;
+ struct {
+ uint8_t status; /* Descriptor status */
+ uint8_t hdr_len; /* Header length */
+ uint16_t mss; /* Maximum segment size */
+ } fields;
+ } tcp_seg_setup;
+};
+
+/* Offload data descriptor */
+struct e1000_data_desc {
+ uint64_t buffer_addr; /* Address of the descriptor's buffer address */
+ union {
+ uint32_t data;
+ struct {
+ uint16_t length; /* Data buffer length */
+ uint8_t typ_len_ext; /* */
+ uint8_t cmd; /* */
+ } flags;
+ } lower;
+ union {
+ uint32_t data;
+ struct {
+ uint8_t status; /* Descriptor status */
+ uint8_t popts; /* Packet Options */
+ uint16_t special; /* */
+ } fields;
+ } upper;
+};
+
+/* Filters */
+#define E1000_NUM_UNICAST 16 /* Unicast filter entries */
+#define E1000_MC_TBL_SIZE 128 /* Multicast Filter Table (4096 bits) */
+#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */
+
+
+/* Receive Address Register */
+struct e1000_rar {
+ volatile uint32_t low; /* receive address low */
+ volatile uint32_t high; /* receive address high */
+};
+
+/* The number of entries in the Multicast Table Array (MTA). */
+#define E1000_NUM_MTA_REGISTERS 128
+
+/* IPv4 Address Table Entry */
+struct e1000_ipv4_at_entry {
+ volatile uint32_t ipv4_addr; /* IP Address (RW) */
+ volatile uint32_t reserved;
+};
+
+/* Four wakeup IP addresses are supported */
+#define E1000_WAKEUP_IP_ADDRESS_COUNT_MAX 4
+#define E1000_IP4AT_SIZE E1000_WAKEUP_IP_ADDRESS_COUNT_MAX
+#define E1000_IP6AT_SIZE 1
+
+/* IPv6 Address Table Entry */
+struct e1000_ipv6_at_entry {
+ volatile uint8_t ipv6_addr[16];
+};
+
+/* Flexible Filter Length Table Entry */
+struct e1000_fflt_entry {
+ volatile uint32_t length; /* Flexible Filter Length (RW) */
+ volatile uint32_t reserved;
+};
+
+/* Flexible Filter Mask Table Entry */
+struct e1000_ffmt_entry {
+ volatile uint32_t mask; /* Flexible Filter Mask (RW) */
+ volatile uint32_t reserved;
+};
+
+/* Flexible Filter Value Table Entry */
+struct e1000_ffvt_entry {
+ volatile uint32_t value; /* Flexible Filter Value (RW) */
+ volatile uint32_t reserved;
+};
+
+/* Four Flexible Filters are supported */
+#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4
+
+/* Each Flexible Filter is at most 128 (0x80) bytes in length */
+#define E1000_FLEXIBLE_FILTER_SIZE_MAX 128
+
+#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX
+#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
+#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
+
+/* Register Set. (82543, 82544)
+ *
+ * Registers are defined to be 32 bits and should be accessed as 32 bit values.
+ * These registers are physically located on the NIC, but are mapped into the
+ * host memory address space.
+ *
+ * RW - register is both readable and writable
+ * RO - register is read only
+ * WO - register is write only
+ * R/clr - register is read only and is cleared when read
+ * A - register array
+ */
+#define E1000_CTRL 0x00000 /* Device Control - RW */
+#define E1000_STATUS 0x00008 /* Device Status - RO */
+#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */
+#define E1000_EERD 0x00014 /* EEPROM Read - RW */
+#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */
+#define E1000_MDIC 0x00020 /* MDI Control - RW */
+#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */
+#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */
+#define E1000_FCT 0x00030 /* Flow Control Type - RW */
+#define E1000_VET 0x00038 /* VLAN Ether Type - RW */
+#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */
+#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */
+#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */
+#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */
+#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */
+#define E1000_RCTL 0x00100 /* RX Control - RW */
+#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */
+#define E1000_TXCW 0x00178 /* TX Configuration Word - RW */
+#define E1000_RXCW 0x00180 /* RX Configuration Word - RO */
+#define E1000_TCTL 0x00400 /* TX Control - RW */
+#define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */
+#define E1000_TBT 0x00448 /* TX Burst Timer - RW */
+#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */
+#define E1000_LEDCTL 0x00E00 /* LED Control - RW */
+#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */
+#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */
+#define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */
+#define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */
+#define E1000_RDLEN 0x02808 /* RX Descriptor Length - RW */
+#define E1000_RDH 0x02810 /* RX Descriptor Head - RW */
+#define E1000_RDT 0x02818 /* RX Descriptor Tail - RW */
+#define E1000_RDTR 0x02820 /* RX Delay Timer - RW */
+#define E1000_RXDCTL 0x02828 /* RX Descriptor Control - RW */
+#define E1000_RADV 0x0282C /* RX Interrupt Absolute Delay Timer - RW */
+#define E1000_RSRPD 0x02C00 /* RX Small Packet Detect - RW */
+#define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */
+#define E1000_TDBAL 0x03800 /* TX Descriptor Base Address Low - RW */
+#define E1000_TDBAH 0x03804 /* TX Descriptor Base Address High - RW */
+#define E1000_TDLEN 0x03808 /* TX Descriptor Length - RW */
+#define E1000_TDH 0x03810 /* TX Descriptor Head - RW */
+#define E1000_TDT 0x03818 /* TX Descripotr Tail - RW */
+#define E1000_TIDV 0x03820 /* TX Interrupt Delay Value - RW */
+#define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */
+#define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */
+#define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */
+#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */
+#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */
+#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */
+#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */
+#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */
+#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */
+#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */
+#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */
+#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */
+#define E1000_COLC 0x04028 /* Collision Count - R/clr */
+#define E1000_DC 0x04030 /* Defer Count - R/clr */
+#define E1000_TNCRS 0x04034 /* TX-No CRS - R/clr */
+#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */
+#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */
+#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */
+#define E1000_XONRXC 0x04048 /* XON RX Count - R/clr */
+#define E1000_XONTXC 0x0404C /* XON TX Count - R/clr */
+#define E1000_XOFFRXC 0x04050 /* XOFF RX Count - R/clr */
+#define E1000_XOFFTXC 0x04054 /* XOFF TX Count - R/clr */
+#define E1000_FCRUC 0x04058 /* Flow Control RX Unsupported Count- R/clr */
+#define E1000_PRC64 0x0405C /* Packets RX (64 bytes) - R/clr */
+#define E1000_PRC127 0x04060 /* Packets RX (65-127 bytes) - R/clr */
+#define E1000_PRC255 0x04064 /* Packets RX (128-255 bytes) - R/clr */
+#define E1000_PRC511 0x04068 /* Packets RX (255-511 bytes) - R/clr */
+#define E1000_PRC1023 0x0406C /* Packets RX (512-1023 bytes) - R/clr */
+#define E1000_PRC1522 0x04070 /* Packets RX (1024-1522 bytes) - R/clr */
+#define E1000_GPRC 0x04074 /* Good Packets RX Count - R/clr */
+#define E1000_BPRC 0x04078 /* Broadcast Packets RX Count - R/clr */
+#define E1000_MPRC 0x0407C /* Multicast Packets RX Count - R/clr */
+#define E1000_GPTC 0x04080 /* Good Packets TX Count - R/clr */
+#define E1000_GORCL 0x04088 /* Good Octets RX Count Low - R/clr */
+#define E1000_GORCH 0x0408C /* Good Octets RX Count High - R/clr */
+#define E1000_GOTCL 0x04090 /* Good Octets TX Count Low - R/clr */
+#define E1000_GOTCH 0x04094 /* Good Octets TX Count High - R/clr */
+#define E1000_RNBC 0x040A0 /* RX No Buffers Count - R/clr */
+#define E1000_RUC 0x040A4 /* RX Undersize Count - R/clr */
+#define E1000_RFC 0x040A8 /* RX Fragment Count - R/clr */
+#define E1000_ROC 0x040AC /* RX Oversize Count - R/clr */
+#define E1000_RJC 0x040B0 /* RX Jabber Count - R/clr */
+#define E1000_MGTPRC 0x040B4 /* Management Packets RX Count - R/clr */
+#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */
+#define E1000_MGTPTC 0x040BC /* Management Packets TX Count - R/clr */
+#define E1000_TORL 0x040C0 /* Total Octets RX Low - R/clr */
+#define E1000_TORH 0x040C4 /* Total Octets RX High - R/clr */
+#define E1000_TOTL 0x040C8 /* Total Octets TX Low - R/clr */
+#define E1000_TOTH 0x040CC /* Total Octets TX High - R/clr */
+#define E1000_TPR 0x040D0 /* Total Packets RX - R/clr */
+#define E1000_TPT 0x040D4 /* Total Packets TX - R/clr */
+#define E1000_PTC64 0x040D8 /* Packets TX (64 bytes) - R/clr */
+#define E1000_PTC127 0x040DC /* Packets TX (65-127 bytes) - R/clr */
+#define E1000_PTC255 0x040E0 /* Packets TX (128-255 bytes) - R/clr */
+#define E1000_PTC511 0x040E4 /* Packets TX (256-511 bytes) - R/clr */
+#define E1000_PTC1023 0x040E8 /* Packets TX (512-1023 bytes) - R/clr */
+#define E1000_PTC1522 0x040EC /* Packets TX (1024-1522 Bytes) - R/clr */
+#define E1000_MPTC 0x040F0 /* Multicast Packets TX Count - R/clr */
+#define E1000_BPTC 0x040F4 /* Broadcast Packets TX Count - R/clr */
+#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context TX - R/clr */
+#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context TX Fail - R/clr */
+#define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */
+#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */
+#define E1000_RA 0x05400 /* Receive Address - RW Array */
+#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */
+#define E1000_WUC 0x05800 /* Wakeup Control - RW */
+#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */
+#define E1000_WUS 0x05810 /* Wakeup Status - RO */
+#define E1000_MANC 0x05820 /* Management Control - RW */
+#define E1000_IPAV 0x05838 /* IP Address Valid - RW */
+#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */
+#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */
+#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */
+#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */
+#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */
+#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */
+#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */
+
+/* Register Set (82542)
+ *
+ * Some of the 82542 registers are located at different offsets than they are
+ * in more current versions of the 8254x. Despite the difference in location,
+ * the registers function in the same manner.
+ */
+#define E1000_82542_CTRL E1000_CTRL
+#define E1000_82542_STATUS E1000_STATUS
+#define E1000_82542_EECD E1000_EECD
+#define E1000_82542_EERD E1000_EERD
+#define E1000_82542_CTRL_EXT E1000_CTRL_EXT
+#define E1000_82542_MDIC E1000_MDIC
+#define E1000_82542_FCAL E1000_FCAL
+#define E1000_82542_FCAH E1000_FCAH
+#define E1000_82542_FCT E1000_FCT
+#define E1000_82542_VET E1000_VET
+#define E1000_82542_RA 0x00040
+#define E1000_82542_ICR E1000_ICR
+#define E1000_82542_ITR E1000_ITR
+#define E1000_82542_ICS E1000_ICS
+#define E1000_82542_IMS E1000_IMS
+#define E1000_82542_IMC E1000_IMC
+#define E1000_82542_RCTL E1000_RCTL
+#define E1000_82542_RDTR 0x00108
+#define E1000_82542_RDBAL 0x00110
+#define E1000_82542_RDBAH 0x00114
+#define E1000_82542_RDLEN 0x00118
+#define E1000_82542_RDH 0x00120
+#define E1000_82542_RDT 0x00128
+#define E1000_82542_FCRTH 0x00160
+#define E1000_82542_FCRTL 0x00168
+#define E1000_82542_FCTTV E1000_FCTTV
+#define E1000_82542_TXCW E1000_TXCW
+#define E1000_82542_RXCW E1000_RXCW
+#define E1000_82542_MTA 0x00200
+#define E1000_82542_TCTL E1000_TCTL
+#define E1000_82542_TIPG E1000_TIPG
+#define E1000_82542_TDBAL 0x00420
+#define E1000_82542_TDBAH 0x00424
+#define E1000_82542_TDLEN 0x00428
+#define E1000_82542_TDH 0x00430
+#define E1000_82542_TDT 0x00438
+#define E1000_82542_TIDV 0x00440
+#define E1000_82542_TBT E1000_TBT
+#define E1000_82542_AIT E1000_AIT
+#define E1000_82542_VFTA 0x00600
+#define E1000_82542_LEDCTL E1000_LEDCTL
+#define E1000_82542_PBA E1000_PBA
+#define E1000_82542_RXDCTL E1000_RXDCTL
+#define E1000_82542_RADV E1000_RADV
+#define E1000_82542_RSRPD E1000_RSRPD
+#define E1000_82542_TXDMAC E1000_TXDMAC
+#define E1000_82542_TXDCTL E1000_TXDCTL
+#define E1000_82542_TADV E1000_TADV
+#define E1000_82542_TSPMT E1000_TSPMT
+#define E1000_82542_CRCERRS E1000_CRCERRS
+#define E1000_82542_ALGNERRC E1000_ALGNERRC
+#define E1000_82542_SYMERRS E1000_SYMERRS
+#define E1000_82542_RXERRC E1000_RXERRC
+#define E1000_82542_MPC E1000_MPC
+#define E1000_82542_SCC E1000_SCC
+#define E1000_82542_ECOL E1000_ECOL
+#define E1000_82542_MCC E1000_MCC
+#define E1000_82542_LATECOL E1000_LATECOL
+#define E1000_82542_COLC E1000_COLC
+#define E1000_82542_DC E1000_DC
+#define E1000_82542_TNCRS E1000_TNCRS
+#define E1000_82542_SEC E1000_SEC
+#define E1000_82542_CEXTERR E1000_CEXTERR
+#define E1000_82542_RLEC E1000_RLEC
+#define E1000_82542_XONRXC E1000_XONRXC
+#define E1000_82542_XONTXC E1000_XONTXC
+#define E1000_82542_XOFFRXC E1000_XOFFRXC
+#define E1000_82542_XOFFTXC E1000_XOFFTXC
+#define E1000_82542_FCRUC E1000_FCRUC
+#define E1000_82542_PRC64 E1000_PRC64
+#define E1000_82542_PRC127 E1000_PRC127
+#define E1000_82542_PRC255 E1000_PRC255
+#define E1000_82542_PRC511 E1000_PRC511
+#define E1000_82542_PRC1023 E1000_PRC1023
+#define E1000_82542_PRC1522 E1000_PRC1522
+#define E1000_82542_GPRC E1000_GPRC
+#define E1000_82542_BPRC E1000_BPRC
+#define E1000_82542_MPRC E1000_MPRC
+#define E1000_82542_GPTC E1000_GPTC
+#define E1000_82542_GORCL E1000_GORCL
+#define E1000_82542_GORCH E1000_GORCH
+#define E1000_82542_GOTCL E1000_GOTCL
+#define E1000_82542_GOTCH E1000_GOTCH
+#define E1000_82542_RNBC E1000_RNBC
+#define E1000_82542_RUC E1000_RUC
+#define E1000_82542_RFC E1000_RFC
+#define E1000_82542_ROC E1000_ROC
+#define E1000_82542_RJC E1000_RJC
+#define E1000_82542_MGTPRC E1000_MGTPRC
+#define E1000_82542_MGTPDC E1000_MGTPDC
+#define E1000_82542_MGTPTC E1000_MGTPTC
+#define E1000_82542_TORL E1000_TORL
+#define E1000_82542_TORH E1000_TORH
+#define E1000_82542_TOTL E1000_TOTL
+#define E1000_82542_TOTH E1000_TOTH
+#define E1000_82542_TPR E1000_TPR
+#define E1000_82542_TPT E1000_TPT
+#define E1000_82542_PTC64 E1000_PTC64
+#define E1000_82542_PTC127 E1000_PTC127
+#define E1000_82542_PTC255 E1000_PTC255
+#define E1000_82542_PTC511 E1000_PTC511
+#define E1000_82542_PTC1023 E1000_PTC1023
+#define E1000_82542_PTC1522 E1000_PTC1522
+#define E1000_82542_MPTC E1000_MPTC
+#define E1000_82542_BPTC E1000_BPTC
+#define E1000_82542_TSCTC E1000_TSCTC
+#define E1000_82542_TSCTFC E1000_TSCTFC
+#define E1000_82542_RXCSUM E1000_RXCSUM
+#define E1000_82542_WUC E1000_WUC
+#define E1000_82542_WUFC E1000_WUFC
+#define E1000_82542_WUS E1000_WUS
+#define E1000_82542_MANC E1000_MANC
+#define E1000_82542_IPAV E1000_IPAV
+#define E1000_82542_IP4AT E1000_IP4AT
+#define E1000_82542_IP6AT E1000_IP6AT
+#define E1000_82542_WUPL E1000_WUPL
+#define E1000_82542_WUPM E1000_WUPM
+#define E1000_82542_FFLT E1000_FFLT
+#define E1000_82542_FFMT E1000_FFMT
+#define E1000_82542_FFVT E1000_FFVT
+
+/* Statistics counters collected by the MAC */
+struct e1000_hw_stats {
+ uint64_t crcerrs;
+ uint64_t algnerrc;
+ uint64_t symerrs;
+ uint64_t rxerrc;
+ uint64_t mpc;
+ uint64_t scc;
+ uint64_t ecol;
+ uint64_t mcc;
+ uint64_t latecol;
+ uint64_t colc;
+ uint64_t dc;
+ uint64_t tncrs;
+ uint64_t sec;
+ uint64_t cexterr;
+ uint64_t rlec;
+ uint64_t xonrxc;
+ uint64_t xontxc;
+ uint64_t xoffrxc;
+ uint64_t xofftxc;
+ uint64_t fcruc;
+ uint64_t prc64;
+ uint64_t prc127;
+ uint64_t prc255;
+ uint64_t prc511;
+ uint64_t prc1023;
+ uint64_t prc1522;
+ uint64_t gprc;
+ uint64_t bprc;
+ uint64_t mprc;
+ uint64_t gptc;
+ uint64_t gorcl;
+ uint64_t gorch;
+ uint64_t gotcl;
+ uint64_t gotch;
+ uint64_t rnbc;
+ uint64_t ruc;
+ uint64_t rfc;
+ uint64_t roc;
+ uint64_t rjc;
+ uint64_t mgprc;
+ uint64_t mgpdc;
+ uint64_t mgptc;
+ uint64_t torl;
+ uint64_t torh;
+ uint64_t totl;
+ uint64_t toth;
+ uint64_t tpr;
+ uint64_t tpt;
+ uint64_t ptc64;
+ uint64_t ptc127;
+ uint64_t ptc255;
+ uint64_t ptc511;
+ uint64_t ptc1023;
+ uint64_t ptc1522;
+ uint64_t mptc;
+ uint64_t bptc;
+ uint64_t tsctc;
+ uint64_t tsctfc;
+};
+
+/* Structure containing variables used by the shared code (e1000_hw.c) */
+struct e1000_hw {
+ uint8_t *hw_addr;
+ e1000_mac_type mac_type;
+ e1000_media_type media_type;
+ void *back;
+ e1000_fc_type fc;
+ e1000_bus_speed bus_speed;
+ e1000_bus_width bus_width;
+ e1000_bus_type bus_type;
+ uint32_t io_base;
+ uint32_t phy_id;
+ uint32_t phy_revision;
+ uint32_t phy_addr;
+ uint32_t original_fc;
+ uint32_t txcw;
+ uint32_t autoneg_failed;
+ uint32_t max_frame_size;
+ uint32_t min_frame_size;
+ uint32_t mc_filter_type;
+ uint32_t num_mc_addrs;
+ uint32_t collision_delta;
+ uint32_t tx_packet_delta;
+ uint32_t ledctl_default;
+ uint32_t ledctl_mode1;
+ uint32_t ledctl_mode2;
+ uint16_t autoneg_advertised;
+ uint16_t pci_cmd_word;
+ uint16_t fc_high_water;
+ uint16_t fc_low_water;
+ uint16_t fc_pause_time;
+ uint16_t current_ifs_val;
+ uint16_t ifs_min_val;
+ uint16_t ifs_max_val;
+ uint16_t ifs_step_size;
+ uint16_t ifs_ratio;
+ uint16_t device_id;
+ uint16_t vendor_id;
+ uint16_t subsystem_id;
+ uint16_t subsystem_vendor_id;
+ uint8_t revision_id;
+ uint8_t autoneg;
+ uint8_t mdix;
+ uint8_t forced_speed_duplex;
+ uint8_t wait_autoneg_complete;
+ uint8_t dma_fairness;
+ uint8_t mac_addr[NODE_ADDRESS_SIZE];
+ uint8_t perm_mac_addr[NODE_ADDRESS_SIZE];
+ boolean_t disable_polarity_correction;
+ boolean_t get_link_status;
+ boolean_t tbi_compatibility_en;
+ boolean_t tbi_compatibility_on;
+ boolean_t fc_send_xon;
+ boolean_t report_tx_early;
+ boolean_t adaptive_ifs;
+ boolean_t ifs_params_forced;
+ boolean_t in_ifs_mode;
+};
+
+
+#define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */
+#define E1000_EEPROM_LED_LOGIC 0x0020 /* Led Logic Word */
+
+/* Register Bit Masks */
+/* Device Control */
+#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */
+#define E1000_CTRL_BEM 0x00000002 /* Endian Mode.0=little,1=big */
+#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */
+#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */
+#define E1000_CTRL_TME 0x00000010 /* Test mode. 0=normal,1=test */
+#define E1000_CTRL_SLE 0x00000020 /* Serial Link on 0=dis,1=en */
+#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */
+#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */
+#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */
+#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */
+#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */
+#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */
+#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */
+#define E1000_CTRL_BEM32 0x00000400 /* Big Endian 32 mode */
+#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */
+#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */
+#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */
+#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */
+#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */
+#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */
+#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */
+#define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */
+#define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */
+#define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */
+#define E1000_CTRL_RST 0x04000000 /* Global reset */
+#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */
+#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */
+#define E1000_CTRL_RTE 0x20000000 /* Routing tag enable */
+#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */
+#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */
+
+/* Device Status */
+#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */
+#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */
+#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */
+#define E1000_STATUS_FUNC_0 0x00000000 /* Function 0 */
+#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */
+#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */
+#define E1000_STATUS_TBIMODE 0x00000020 /* TBI mode */
+#define E1000_STATUS_SPEED_MASK 0x000000C0
+#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */
+#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */
+#define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */
+#define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */
+#define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */
+#define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */
+#define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */
+#define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */
+
+/* Constants used to intrepret the masked PCI-X bus speed. */
+#define E1000_STATUS_PCIX_SPEED_66 0x00000000 /* PCI-X bus speed 50-66 MHz */
+#define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus speed 66-100 MHz */
+#define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus speed 100-133 MHz */
+
+/* EEPROM/Flash Control */
+#define E1000_EECD_SK 0x00000001 /* EEPROM Clock */
+#define E1000_EECD_CS 0x00000002 /* EEPROM Chip Select */
+#define E1000_EECD_DI 0x00000004 /* EEPROM Data In */
+#define E1000_EECD_DO 0x00000008 /* EEPROM Data Out */
+#define E1000_EECD_FWE_MASK 0x00000030
+#define E1000_EECD_FWE_DIS 0x00000010 /* Disable FLASH writes */
+#define E1000_EECD_FWE_EN 0x00000020 /* Enable FLASH writes */
+#define E1000_EECD_FWE_SHIFT 4
+#define E1000_EECD_SIZE 0x00000200 /* EEPROM Size (0=64 word 1=256 word) */
+#define E1000_EECD_REQ 0x00000040 /* EEPROM Access Request */
+#define E1000_EECD_GNT 0x00000080 /* EEPROM Access Grant */
+#define E1000_EECD_PRES 0x00000100 /* EEPROM Present */
+
+/* EEPROM Read */
+#define E1000_EERD_START 0x00000001 /* Start Read */
+#define E1000_EERD_DONE 0x00000010 /* Read Done */
+#define E1000_EERD_ADDR_SHIFT 8
+#define E1000_EERD_ADDR_MASK 0x0000FF00 /* Read Address */
+#define E1000_EERD_DATA_SHIFT 16
+#define E1000_EERD_DATA_MASK 0xFFFF0000 /* Read Data */
+
+/* Extended Device Control */
+#define E1000_CTRL_EXT_GPI0_EN 0x00000001 /* Maps SDP4 to GPI0 */
+#define E1000_CTRL_EXT_GPI1_EN 0x00000002 /* Maps SDP5 to GPI1 */
+#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN
+#define E1000_CTRL_EXT_GPI2_EN 0x00000004 /* Maps SDP6 to GPI2 */
+#define E1000_CTRL_EXT_GPI3_EN 0x00000008 /* Maps SDP7 to GPI3 */
+#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* Value of SW Defineable Pin 4 */
+#define E1000_CTRL_EXT_SDP5_DATA 0x00000020 /* Value of SW Defineable Pin 5 */
+#define E1000_CTRL_EXT_PHY_INT E1000_CTRL_EXT_SDP5_DATA
+#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* Value of SW Defineable Pin 6 */
+#define E1000_CTRL_EXT_SDP7_DATA 0x00000080 /* Value of SW Defineable Pin 7 */
+#define E1000_CTRL_EXT_SDP4_DIR 0x00000100 /* Direction of SDP4 0=in 1=out */
+#define E1000_CTRL_EXT_SDP5_DIR 0x00000200 /* Direction of SDP5 0=in 1=out */
+#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */
+#define E1000_CTRL_EXT_SDP7_DIR 0x00000800 /* Direction of SDP7 0=in 1=out */
+#define E1000_CTRL_EXT_ASDCHK 0x00001000 /* Initiate an ASD sequence */
+#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */
+#define E1000_CTRL_EXT_IPS 0x00004000 /* Invert Power State */
+#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */
+#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000
+#define E1000_CTRL_EXT_LINK_MODE_TBI 0x00C00000
+#define E1000_CTRL_EXT_WR_WMARK_MASK 0x03000000
+#define E1000_CTRL_EXT_WR_WMARK_256 0x00000000
+#define E1000_CTRL_EXT_WR_WMARK_320 0x01000000
+#define E1000_CTRL_EXT_WR_WMARK_384 0x02000000
+#define E1000_CTRL_EXT_WR_WMARK_448 0x03000000
+
+/* MDI Control */
+#define E1000_MDIC_DATA_MASK 0x0000FFFF
+#define E1000_MDIC_REG_MASK 0x001F0000
+#define E1000_MDIC_REG_SHIFT 16
+#define E1000_MDIC_PHY_MASK 0x03E00000
+#define E1000_MDIC_PHY_SHIFT 21
+#define E1000_MDIC_OP_WRITE 0x04000000
+#define E1000_MDIC_OP_READ 0x08000000
+#define E1000_MDIC_READY 0x10000000
+#define E1000_MDIC_INT_EN 0x20000000
+#define E1000_MDIC_ERROR 0x40000000
+
+/* LED Control */
+#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F
+#define E1000_LEDCTL_LED0_MODE_SHIFT 0
+#define E1000_LEDCTL_LED0_IVRT 0x00000040
+#define E1000_LEDCTL_LED0_BLINK 0x00000080
+#define E1000_LEDCTL_LED1_MODE_MASK 0x00000F00
+#define E1000_LEDCTL_LED1_MODE_SHIFT 8
+#define E1000_LEDCTL_LED1_IVRT 0x00004000
+#define E1000_LEDCTL_LED1_BLINK 0x00008000
+#define E1000_LEDCTL_LED2_MODE_MASK 0x000F0000
+#define E1000_LEDCTL_LED2_MODE_SHIFT 16
+#define E1000_LEDCTL_LED2_IVRT 0x00400000
+#define E1000_LEDCTL_LED2_BLINK 0x00800000
+#define E1000_LEDCTL_LED3_MODE_MASK 0x0F000000
+#define E1000_LEDCTL_LED3_MODE_SHIFT 24
+#define E1000_LEDCTL_LED3_IVRT 0x40000000
+#define E1000_LEDCTL_LED3_BLINK 0x80000000
+
+#define E1000_LEDCTL_MODE_LINK_10_1000 0x0
+#define E1000_LEDCTL_MODE_LINK_100_1000 0x1
+#define E1000_LEDCTL_MODE_LINK_UP 0x2
+#define E1000_LEDCTL_MODE_ACTIVITY 0x3
+#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4
+#define E1000_LEDCTL_MODE_LINK_10 0x5
+#define E1000_LEDCTL_MODE_LINK_100 0x6
+#define E1000_LEDCTL_MODE_LINK_1000 0x7
+#define E1000_LEDCTL_MODE_PCIX_MODE 0x8
+#define E1000_LEDCTL_MODE_FULL_DUPLEX 0x9
+#define E1000_LEDCTL_MODE_COLLISION 0xA
+#define E1000_LEDCTL_MODE_BUS_SPEED 0xB
+#define E1000_LEDCTL_MODE_BUS_SIZE 0xC
+#define E1000_LEDCTL_MODE_PAUSED 0xD
+#define E1000_LEDCTL_MODE_LED_ON 0xE
+#define E1000_LEDCTL_MODE_LED_OFF 0xF
+
+/* Receive Address */
+#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */
+
+/* Interrupt Cause Read */
+#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */
+#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */
+#define E1000_ICR_LSC 0x00000004 /* Link Status Change */
+#define E1000_ICR_RXSEQ 0x00000008 /* rx sequence error */
+#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */
+#define E1000_ICR_RXO 0x00000040 /* rx overrun */
+#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */
+#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */
+#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */
+#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */
+#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */
+#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */
+#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */
+#define E1000_ICR_TXD_LOW 0x00008000
+#define E1000_ICR_SRPD 0x00010000
+
+/* Interrupt Cause Set */
+#define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */
+#define E1000_ICS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */
+#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */
+#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */
+#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
+#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */
+#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
+#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */
+#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
+#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
+#define E1000_ICS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */
+#define E1000_ICS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */
+#define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */
+#define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW
+#define E1000_ICS_SRPD E1000_ICR_SRPD
+
+/* Interrupt Mask Set */
+#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */
+#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */
+#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */
+#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */
+#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
+#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */
+#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
+#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */
+#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
+#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
+#define E1000_IMS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */
+#define E1000_IMS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */
+#define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */
+#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW
+#define E1000_IMS_SRPD E1000_ICR_SRPD
+
+/* Interrupt Mask Clear */
+#define E1000_IMC_TXDW E1000_ICR_TXDW /* Transmit desc written back */
+#define E1000_IMC_TXQE E1000_ICR_TXQE /* Transmit Queue empty */
+#define E1000_IMC_LSC E1000_ICR_LSC /* Link Status Change */
+#define E1000_IMC_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */
+#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
+#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */
+#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */
+#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */
+#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
+#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
+#define E1000_IMC_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */
+#define E1000_IMC_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */
+#define E1000_IMC_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */
+#define E1000_IMC_TXD_LOW E1000_ICR_TXD_LOW
+#define E1000_IMC_SRPD E1000_ICR_SRPD
+
+/* Receive Control */
+#define E1000_RCTL_RST 0x00000001 /* Software reset */
+#define E1000_RCTL_EN 0x00000002 /* enable */
+#define E1000_RCTL_SBP 0x00000004 /* store bad packet */
+#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */
+#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */
+#define E1000_RCTL_LPE 0x00000020 /* long packet enable */
+#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */
+#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */
+#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */
+#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */
+#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */
+#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */
+#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */
+#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */
+#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */
+#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */
+#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */
+#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
+#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */
+#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */
+#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */
+#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
+#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */
+#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */
+#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */
+#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */
+#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */
+#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */
+#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */
+#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */
+#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */
+
+/* Receive Descriptor */
+#define E1000_RDT_DELAY 0x0000ffff /* Delay timer (1=1024us) */
+#define E1000_RDT_FPDB 0x80000000 /* Flush descriptor block */
+#define E1000_RDLEN_LEN 0x0007ff80 /* descriptor length */
+#define E1000_RDH_RDH 0x0000ffff /* receive descriptor head */
+#define E1000_RDT_RDT 0x0000ffff /* receive descriptor tail */
+
+/* Flow Control */
+#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */
+#define E1000_FCRTH_XFCE 0x80000000 /* External Flow Control Enable */
+#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */
+#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */
+
+/* Receive Descriptor Control */
+#define E1000_RXDCTL_PTHRESH 0x0000003F /* RXDCTL Prefetch Threshold */
+#define E1000_RXDCTL_HTHRESH 0x00003F00 /* RXDCTL Host Threshold */
+#define E1000_RXDCTL_WTHRESH 0x003F0000 /* RXDCTL Writeback Threshold */
+#define E1000_RXDCTL_GRAN 0x01000000 /* RXDCTL Granularity */
+
+/* Transmit Descriptor Control */
+#define E1000_TXDCTL_PTHRESH 0x000000FF /* TXDCTL Prefetch Threshold */
+#define E1000_TXDCTL_HTHRESH 0x0000FF00 /* TXDCTL Host Threshold */
+#define E1000_TXDCTL_WTHRESH 0x00FF0000 /* TXDCTL Writeback Threshold */
+#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */
+#define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */
+#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
+
+/* Transmit Configuration Word */
+#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */
+#define E1000_TXCW_HD 0x00000040 /* TXCW half duplex */
+#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */
+#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */
+#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */
+#define E1000_TXCW_RF 0x00003000 /* TXCW remote fault */
+#define E1000_TXCW_NP 0x00008000 /* TXCW next page */
+#define E1000_TXCW_CW 0x0000ffff /* TxConfigWord mask */
+#define E1000_TXCW_TXC 0x40000000 /* Transmit Config control */
+#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */
+
+/* Receive Configuration Word */
+#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */
+#define E1000_RXCW_NC 0x04000000 /* Receive config no carrier */
+#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */
+#define E1000_RXCW_CC 0x10000000 /* Receive config change */
+#define E1000_RXCW_C 0x20000000 /* Receive config */
+#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */
+#define E1000_RXCW_ANC 0x80000000 /* Auto-neg complete */
+
+/* Transmit Control */
+#define E1000_TCTL_RST 0x00000001 /* software reset */
+#define E1000_TCTL_EN 0x00000002 /* enable tx */
+#define E1000_TCTL_BCE 0x00000004 /* busy check enable */
+#define E1000_TCTL_PSP 0x00000008 /* pad short packets */
+#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */
+#define E1000_TCTL_COLD 0x003ff000 /* collision distance */
+#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */
+#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */
+#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */
+#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */
+
+/* Receive Checksum Control */
+#define E1000_RXCSUM_PCSS_MASK 0x000000FF /* Packet Checksum Start */
+#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */
+#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */
+#define E1000_RXCSUM_IPV6OFL 0x00000400 /* IPv6 checksum offload */
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define E1000_WUC_APME 0x00000001 /* APM Enable */
+#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */
+#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */
+#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */
+
+/* Wake Up Filter Control */
+#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */
+#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */
+#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */
+#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */
+#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */
+#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+#define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */
+#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
+#define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */
+#define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */
+#define E1000_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */
+#define E1000_WUFC_ALL_FILTERS 0x000F00FF /* Mask for all wakeup filters */
+#define E1000_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */
+#define E1000_WUFC_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */
+
+/* Wake Up Status */
+#define E1000_WUS_LNKC 0x00000001 /* Link Status Changed */
+#define E1000_WUS_MAG 0x00000002 /* Magic Packet Received */
+#define E1000_WUS_EX 0x00000004 /* Directed Exact Received */
+#define E1000_WUS_MC 0x00000008 /* Directed Multicast Received */
+#define E1000_WUS_BC 0x00000010 /* Broadcast Received */
+#define E1000_WUS_ARP 0x00000020 /* ARP Request Packet Received */
+#define E1000_WUS_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Received */
+#define E1000_WUS_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Received */
+#define E1000_WUS_FLX0 0x00010000 /* Flexible Filter 0 Match */
+#define E1000_WUS_FLX1 0x00020000 /* Flexible Filter 1 Match */
+#define E1000_WUS_FLX2 0x00040000 /* Flexible Filter 2 Match */
+#define E1000_WUS_FLX3 0x00080000 /* Flexible Filter 3 Match */
+#define E1000_WUS_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */
+
+/* Management Control */
+#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */
+#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */
+#define E1000_MANC_R_ON_FORCE 0x00000004 /* Reset on Force TCO - RO */
+#define E1000_MANC_RMCP_EN 0x00000100 /* Enable RCMP 026Fh Filtering */
+#define E1000_MANC_0298_EN 0x00000200 /* Enable RCMP 0298h Filtering */
+#define E1000_MANC_IPV4_EN 0x00000400 /* Enable IPv4 */
+#define E1000_MANC_IPV6_EN 0x00000800 /* Enable IPv6 */
+#define E1000_MANC_SNAP_EN 0x00001000 /* Accept LLC/SNAP */
+#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */
+#define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery
+ * Filtering */
+#define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */
+#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */
+#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */
+#define E1000_MANC_SMB_REQ 0x01000000 /* SMBus Request */
+#define E1000_MANC_SMB_GNT 0x02000000 /* SMBus Grant */
+#define E1000_MANC_SMB_CLK_IN 0x04000000 /* SMBus Clock In */
+#define E1000_MANC_SMB_DATA_IN 0x08000000 /* SMBus Data In */
+#define E1000_MANC_SMB_DATA_OUT 0x10000000 /* SMBus Data Out */
+#define E1000_MANC_SMB_CLK_OUT 0x20000000 /* SMBus Clock Out */
+
+#define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */
+#define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */
+
+/* Wake Up Packet Length */
+#define E1000_WUPL_LENGTH_MASK 0x0FFF /* Only the lower 12 bits are valid */
+
+#define E1000_MDALIGN 4096
+
+/* EEPROM Commands */
+#define EEPROM_READ_OPCODE 0x6 /* EERPOM read opcode */
+#define EEPROM_WRITE_OPCODE 0x5 /* EERPOM write opcode */
+#define EEPROM_ERASE_OPCODE 0x7 /* EERPOM erase opcode */
+#define EEPROM_EWEN_OPCODE 0x13 /* EERPOM erase/write enable */
+#define EEPROM_EWDS_OPCODE 0x10 /* EERPOM erast/write disable */
+
+/* EEPROM Word Offsets */
+#define EEPROM_COMPAT 0x0003
+#define EEPROM_ID_LED_SETTINGS 0x0004
+#define EEPROM_INIT_CONTROL1_REG 0x000A
+#define EEPROM_INIT_CONTROL2_REG 0x000F
+#define EEPROM_FLASH_VERSION 0x0032
+#define EEPROM_CHECKSUM_REG 0x003F
+
+/* Word definitions for ID LED Settings */
+#define ID_LED_RESERVED_0000 0x0000
+#define ID_LED_RESERVED_FFFF 0xFFFF
+#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \
+ (ID_LED_OFF1_OFF2 << 8) | \
+ (ID_LED_DEF1_DEF2 << 4) | \
+ (ID_LED_DEF1_DEF2))
+#define ID_LED_DEF1_DEF2 0x1
+#define ID_LED_DEF1_ON2 0x2
+#define ID_LED_DEF1_OFF2 0x3
+#define ID_LED_ON1_DEF2 0x4
+#define ID_LED_ON1_ON2 0x5
+#define ID_LED_ON1_OFF2 0x6
+#define ID_LED_OFF1_DEF2 0x7
+#define ID_LED_OFF1_ON2 0x8
+#define ID_LED_OFF1_OFF2 0x9
+
+/* Mask bits for fields in Word 0x03 of the EEPROM */
+#define EEPROM_COMPAT_SERVER 0x0400
+#define EEPROM_COMPAT_CLIENT 0x0200
+
+/* Mask bits for fields in Word 0x0a of the EEPROM */
+#define EEPROM_WORD0A_ILOS 0x0010
+#define EEPROM_WORD0A_SWDPIO 0x01E0
+#define EEPROM_WORD0A_LRST 0x0200
+#define EEPROM_WORD0A_FD 0x0400
+#define EEPROM_WORD0A_66MHZ 0x0800
+
+/* Mask bits for fields in Word 0x0f of the EEPROM */
+#define EEPROM_WORD0F_PAUSE_MASK 0x3000
+#define EEPROM_WORD0F_PAUSE 0x1000
+#define EEPROM_WORD0F_ASM_DIR 0x2000
+#define EEPROM_WORD0F_ANE 0x0800
+#define EEPROM_WORD0F_SWPDIO_EXT 0x00F0
+
+/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */
+#define EEPROM_SUM 0xBABA
+
+/* EEPROM Map defines (WORD OFFSETS)*/
+#define EEPROM_NODE_ADDRESS_BYTE_0 0
+#define EEPROM_PBA_BYTE_1 8
+
+/* EEPROM Map Sizes (Byte Counts) */
+#define PBA_SIZE 4
+
+/* Collision related configuration parameters */
+#define E1000_COLLISION_THRESHOLD 16
+#define E1000_CT_SHIFT 4
+#define E1000_COLLISION_DISTANCE 64
+#define E1000_FDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE
+#define E1000_HDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE
+#define E1000_GB_HDX_COLLISION_DISTANCE 512
+#define E1000_COLD_SHIFT 12
+
+/* The number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE 8
+#define REQ_RX_DESCRIPTOR_MULTIPLE 8
+
+/* Default values for the transmit IPG register */
+#define DEFAULT_82542_TIPG_IPGT 10
+#define DEFAULT_82543_TIPG_IPGT_FIBER 9
+#define DEFAULT_82543_TIPG_IPGT_COPPER 8
+
+#define E1000_TIPG_IPGT_MASK 0x000003FF
+#define E1000_TIPG_IPGR1_MASK 0x000FFC00
+#define E1000_TIPG_IPGR2_MASK 0x3FF00000
+
+#define DEFAULT_82542_TIPG_IPGR1 2
+#define DEFAULT_82543_TIPG_IPGR1 8
+#define E1000_TIPG_IPGR1_SHIFT 10
+
+#define DEFAULT_82542_TIPG_IPGR2 10
+#define DEFAULT_82543_TIPG_IPGR2 6
+#define E1000_TIPG_IPGR2_SHIFT 20
+
+#define E1000_TXDMAC_DPP 0x00000001
+
+/* Adaptive IFS defines */
+#define TX_THRESHOLD_START 8
+#define TX_THRESHOLD_INCREMENT 10
+#define TX_THRESHOLD_DECREMENT 1
+#define TX_THRESHOLD_STOP 190
+#define TX_THRESHOLD_DISABLE 0
+#define TX_THRESHOLD_TIMER_MS 10000
+#define MIN_NUM_XMITS 1000
+#define IFS_MAX 80
+#define IFS_STEP 10
+#define IFS_MIN 40
+#define IFS_RATIO 4
+
+/* PBA constants */
+#define E1000_PBA_16K 0x0010 /* 16KB, default TX allocation */
+#define E1000_PBA_24K 0x0018
+#define E1000_PBA_40K 0x0028
+#define E1000_PBA_48K 0x0030 /* 48KB, default RX allocation */
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
+#define FLOW_CONTROL_TYPE 0x8808
+
+/* The historical defaults for the flow control values are given below. */
+#define FC_DEFAULT_HI_THRESH (0x8000) /* 32KB */
+#define FC_DEFAULT_LO_THRESH (0x4000) /* 16KB */
+#define FC_DEFAULT_TX_TIMER (0x100) /* ~130 us */
+
+/* PCIX Config space */
+#define PCIX_COMMAND_REGISTER 0xE6
+#define PCIX_STATUS_REGISTER_LO 0xE8
+#define PCIX_STATUS_REGISTER_HI 0xEA
+
+#define PCIX_COMMAND_MMRBC_MASK 0x000C
+#define PCIX_COMMAND_MMRBC_SHIFT 0x2
+#define PCIX_STATUS_HI_MMRBC_MASK 0x0060
+#define PCIX_STATUS_HI_MMRBC_SHIFT 0x5
+#define PCIX_STATUS_HI_MMRBC_4K 0x3
+#define PCIX_STATUS_HI_MMRBC_2K 0x2
+
+
+/* The number of bits that we need to shift right to move the "pause"
+ * bits from the EEPROM (bits 13:12) to the "pause" (bits 8:7) field
+ * in the TXCW register
+ */
+#define PAUSE_SHIFT 5
+
+/* The number of bits that we need to shift left to move the "SWDPIO"
+ * bits from the EEPROM (bits 8:5) to the "SWDPIO" (bits 25:22) field
+ * in the CTRL register
+ */
+#define SWDPIO_SHIFT 17
+
+/* The number of bits that we need to shift left to move the "SWDPIO_EXT"
+ * bits from the EEPROM word F (bits 7:4) to the bits 11:8 of The
+ * Extended CTRL register.
+ * in the CTRL register
+ */
+#define SWDPIO__EXT_SHIFT 4
+
+/* The number of bits that we need to shift left to move the "ILOS"
+ * bit from the EEPROM (bit 4) to the "ILOS" (bit 7) field
+ * in the CTRL register
+ */
+#define ILOS_SHIFT 3
+
+
+#define RECEIVE_BUFFER_ALIGN_SIZE (256)
+
+/* The number of milliseconds we wait for auto-negotiation to complete */
+#define LINK_UP_TIMEOUT 500
+
+#define E1000_TX_BUFFER_SIZE ((uint32_t)1514)
+
+/* The carrier extension symbol, as received by the NIC. */
+#define CARRIER_EXTENSION 0x0F
+
+/* TBI_ACCEPT macro definition:
+ *
+ * This macro requires:
+ * adapter = a pointer to struct e1000_hw
+ * status = the 8 bit status field of the RX descriptor with EOP set
+ * error = the 8 bit error field of the RX descriptor with EOP set
+ * length = the sum of all the length fields of the RX descriptors that
+ * make up the current frame
+ * last_byte = the last byte of the frame DMAed by the hardware
+ * max_frame_length = the maximum frame length we want to accept.
+ * min_frame_length = the minimum frame length we want to accept.
+ *
+ * This macro is a conditional that should be used in the interrupt
+ * handler's Rx processing routine when RxErrors have been detected.
+ *
+ * Typical use:
+ * ...
+ * if (TBI_ACCEPT) {
+ * accept_frame = TRUE;
+ * e1000_tbi_adjust_stats(adapter, MacAddress);
+ * frame_length--;
+ * } else {
+ * accept_frame = FALSE;
+ * }
+ * ...
+ */
+
+#define TBI_ACCEPT(adapter, status, errors, length, last_byte) \
+ ((adapter)->tbi_compatibility_on && \
+ (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
+ ((last_byte) == CARRIER_EXTENSION) && \
+ (((status) & E1000_RXD_STAT_VP) ? \
+ (((length) > ((adapter)->min_frame_size - VLAN_TAG_SIZE)) && \
+ ((length) <= ((adapter)->max_frame_size + 1))) : \
+ (((length) > (adapter)->min_frame_size) && \
+ ((length) <= ((adapter)->max_frame_size + VLAN_TAG_SIZE + 1)))))
+
+
+/* Structures, enums, and macros for the PHY */
+
+/* Bit definitions for the Management Data IO (MDIO) and Management Data
+ * Clock (MDC) pins in the Device Control Register.
+ */
+#define E1000_CTRL_PHY_RESET_DIR E1000_CTRL_SWDPIO0
+#define E1000_CTRL_PHY_RESET E1000_CTRL_SWDPIN0
+#define E1000_CTRL_MDIO_DIR E1000_CTRL_SWDPIO2
+#define E1000_CTRL_MDIO E1000_CTRL_SWDPIN2
+#define E1000_CTRL_MDC_DIR E1000_CTRL_SWDPIO3
+#define E1000_CTRL_MDC E1000_CTRL_SWDPIN3
+#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR
+#define E1000_CTRL_PHY_RESET4 E1000_CTRL_EXT_SDP4_DATA
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CTRL 0x00 /* Control Register */
+#define PHY_STATUS 0x01 /* Status Regiser */
+#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */
+#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */
+#define PHY_NEXT_PAGE_TX 0x07 /* Next Page TX */
+#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */
+#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
+#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */
+
+/* M88E1000 Specific Registers */
+#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */
+#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */
+#define M88E1000_INT_ENABLE 0x12 /* Interrupt Enable Register */
+#define M88E1000_INT_STATUS 0x13 /* Interrupt Status Register */
+#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */
+#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */
+
+#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */
+
+/* PHY Control Register */
+#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */
+#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */
+#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */
+#define MII_CR_POWER_DOWN 0x0800 /* Power down */
+#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */
+#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */
+
+/* PHY Status Register */
+#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */
+#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */
+#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */
+#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */
+#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */
+#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
+#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */
+#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */
+#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */
+#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */
+#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */
+#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */
+#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */
+#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */
+#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */
+#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */
+#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */
+#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */
+#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */
+#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */
+#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP is 10T Half Duplex Capable */
+#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP is 10T Full Duplex Capable */
+#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP is 100TX Half Duplex Capable */
+#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP is 100TX Full Duplex Capable */
+#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */
+#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */
+#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP has detected Remote Fault */
+#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP has rx'd link code word */
+#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */
+
+/* Autoneg Expansion Register */
+#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */
+#define NWAY_ER_PAGE_RXD 0x0002 /* LP is 10T Half Duplex Capable */
+#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP is 10T Full Duplex Capable */
+#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP is 100TX Half Duplex Capable */
+#define NWAY_ER_PAR_DETECT_FAULT 0x0100 /* LP is 100TX Full Duplex Capable */
+
+/* Next Page TX Register */
+#define NPTX_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */
+#define NPTX_TOGGLE 0x0800 /* Toggles between exchanges
+ * of different NP
+ */
+#define NPTX_ACKNOWLDGE2 0x1000 /* 1 = will comply with msg
+ * 0 = cannot comply with msg
+ */
+#define NPTX_MSG_PAGE 0x2000 /* formatted(1)/unformatted(0) pg */
+#define NPTX_NEXT_PAGE 0x8000 /* 1 = addition NP will follow
+ * 0 = sending last NP
+ */
+
+/* Link Partner Next Page Register */
+#define LP_RNPR_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */
+#define LP_RNPR_TOGGLE 0x0800 /* Toggles between exchanges
+ * of different NP
+ */
+#define LP_RNPR_ACKNOWLDGE2 0x1000 /* 1 = will comply with msg
+ * 0 = cannot comply with msg
+ */
+#define LP_RNPR_MSG_PAGE 0x2000 /* formatted(1)/unformatted(0) pg */
+#define LP_RNPR_ACKNOWLDGE 0x4000 /* 1 = ACK / 0 = NO ACK */
+#define LP_RNPR_NEXT_PAGE 0x8000 /* 1 = addition NP will follow
+ * 0 = sending last NP
+ */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */
+#define CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port */
+ /* 0=DTE device */
+#define CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */
+ /* 0=Configure PHY as Slave */
+#define CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */
+ /* 0=Automatic Master/Slave config */
+#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
+#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */
+#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */
+#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */
+#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle errors since last read */
+#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asymmetric pause direction bit */
+#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */
+#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */
+#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */
+#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local TX is Master, 0=Slave */
+#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */
+#define SR_1000T_REMOTE_RX_STATUS_SHIFT 12
+#define SR_1000T_LOCAL_RX_STATUS_SHIFT 13
+
+/* Extended Status Register */
+#define IEEE_ESR_1000T_HD_CAPS 0x1000 /* 1000T HD capable */
+#define IEEE_ESR_1000T_FD_CAPS 0x2000 /* 1000T FD capable */
+#define IEEE_ESR_1000X_HD_CAPS 0x4000 /* 1000X HD capable */
+#define IEEE_ESR_1000X_FD_CAPS 0x8000 /* 1000X FD capable */
+
+#define PHY_TX_POLARITY_MASK 0x0100 /* register 10h bit 8 (polarity bit) */
+#define PHY_TX_NORMAL_POLARITY 0 /* register 10h bit 8 (normal polarity) */
+
+#define AUTO_POLARITY_DISABLE 0x0010 /* register 11h bit 4 */
+ /* (0=enable, 1=disable) */
+
+/* M88E1000 PHY Specific Control Register */
+#define M88E1000_PSCR_JABBER_DISABLE 0x0001 /* 1=Jabber Function disabled */
+#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
+#define M88E1000_PSCR_SQE_TEST 0x0004 /* 1=SQE Test enabled */
+#define M88E1000_PSCR_CLK125_DISABLE 0x0010 /* 1=CLK125 low,
+ * 0=CLK125 toggling
+ */
+#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 /* MDI Crossover Mode bits 6:5 */
+ /* Manual MDI configuration */
+#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */
+#define M88E1000_PSCR_AUTO_X_1000T 0x0040 /* 1000BASE-T: Auto crossover,
+ * 100BASE-TX/10BASE-T:
+ * MDI Mode
+ */
+#define M88E1000_PSCR_AUTO_X_MODE 0x0060 /* Auto crossover enabled
+ * all speeds.
+ */
+#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE 0x0080
+ /* 1=Enable Extended 10BASE-T distance
+ * (Lower 10BASE-T RX Threshold)
+ * 0=Normal 10BASE-T RX Threshold */
+#define M88E1000_PSCR_MII_5BIT_ENABLE 0x0100
+ /* 1=5-Bit interface in 100BASE-TX
+ * 0=MII interface in 100BASE-TX */
+#define M88E1000_PSCR_SCRAMBLER_DISABLE 0x0200 /* 1=Scrambler disable */
+#define M88E1000_PSCR_FORCE_LINK_GOOD 0x0400 /* 1=Force link good */
+#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */
+
+#define M88E1000_PSCR_POLARITY_REVERSAL_SHIFT 1
+#define M88E1000_PSCR_AUTO_X_MODE_SHIFT 5
+#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT 7
+
+/* M88E1000 PHY Specific Status Register */
+#define M88E1000_PSSR_JABBER 0x0001 /* 1=Jabber */
+#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */
+#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */
+#define M88E1000_PSSR_CABLE_LENGTH 0x0380 /* 0=<50M;1=50-80M;2=80-110M;
+ * 3=110-140M;4=>140M */
+#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */
+#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */
+#define M88E1000_PSSR_PAGE_RCVD 0x1000 /* 1=Page received */
+#define M88E1000_PSSR_DPLX 0x2000 /* 1=Duplex 0=Half Duplex */
+#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */
+#define M88E1000_PSSR_10MBS 0x0000 /* 00=10Mbs */
+#define M88E1000_PSSR_100MBS 0x4000 /* 01=100Mbs */
+#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */
+
+#define M88E1000_PSSR_REV_POLARITY_SHIFT 1
+#define M88E1000_PSSR_MDIX_SHIFT 6
+#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
+
+/* M88E1000 Extended PHY Specific Control Register */
+#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000 /* 1=Fiber loopback */
+#define M88E1000_EPSCR_DOWN_NO_IDLE 0x8000 /* 1=Lost lock detect enabled.
+ * Will assert lost lock and bring
+ * link down if idle not seen
+ * within 1ms in 1000BASE-T
+ */
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master */
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X 0x0400
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X 0x0800
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X 0x0C00
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the slave */
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS 0x0000
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X 0x0200
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X 0x0300
+#define M88E1000_EPSCR_TX_CLK_2_5 0x0060 /* 2.5 MHz TX_CLK */
+#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */
+#define M88E1000_EPSCR_TX_CLK_0 0x0000 /* NO TX_CLK */
+
+/* Bit definitions for valid PHY IDs. */
+#define M88E1000_E_PHY_ID 0x01410C50
+#define M88E1000_I_PHY_ID 0x01410C30
+#define M88E1011_I_PHY_ID 0x01410C20
+#define M88E1000_12_PHY_ID M88E1000_E_PHY_ID
+#define M88E1000_14_PHY_ID M88E1000_E_PHY_ID
+#define M88E1011_I_REV_4 0x04
+
+/* Miscellaneous PHY bit definitions. */
+#define PHY_PREAMBLE 0xFFFFFFFF
+#define PHY_SOF 0x01
+#define PHY_OP_READ 0x02
+#define PHY_OP_WRITE 0x01
+#define PHY_TURNAROUND 0x02
+#define PHY_PREAMBLE_SIZE 32
+#define MII_CR_SPEED_1000 0x0040
+#define MII_CR_SPEED_100 0x2000
+#define MII_CR_SPEED_10 0x0000
+#define E1000_PHY_ADDRESS 0x01
+#define PHY_AUTO_NEG_TIME 45 /* 4.5 Seconds */
+#define PHY_FORCE_TIME 20 /* 2.0 Seconds */
+#define PHY_REVISION_MASK 0xFFFFFFF0
+#define DEVICE_SPEED_MASK 0x00000300 /* Device Ctrl Reg Speed Mask */
+#define REG4_SPEED_MASK 0x01E0
+#define REG9_SPEED_MASK 0x0300
+#define ADVERTISE_10_HALF 0x0001
+#define ADVERTISE_10_FULL 0x0002
+#define ADVERTISE_100_HALF 0x0004
+#define ADVERTISE_100_FULL 0x0008
+#define ADVERTISE_1000_HALF 0x0010
+#define ADVERTISE_1000_FULL 0x0020
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT 0x002F /* Everything but 1000-Half */
+
+#endif /* _E1000_HW_H_ */
diff --git a/xen/drivers/net/e1000/e1000_main.c b/xen/drivers/net/e1000/e1000_main.c
new file mode 100644
index 0000000000..8afbe394c2
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_main.c
@@ -0,0 +1,2279 @@
+/*******************************************************************************
+
+
+ Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000.h"
+
+/* Change Log
+ *
+ * 4.4.19 11/27/02
+ * o Feature: Added user-settable knob for interrupt throttle rate (ITR).
+ * o Cleanup: removed large static array allocations.
+ * o Cleanup: C99 struct initializer format.
+ * o Bug fix: restore VLAN settings when interface is brought up.
+ * o Bug fix: return cleanly in probe if error in detecting MAC type.
+ * o Bug fix: Wake up on magic packet by default only if enabled in eeprom.
+ * o Bug fix: Validate MAC address in set_mac.
+ * o Bug fix: Throw away zero-length Tx skbs.
+ * o Bug fix: Make ethtool EEPROM acceses work on older versions of ethtool.
+ *
+ * 4.4.12 10/15/02
+ * o Clean up: use members of pci_device rather than direct calls to
+ * pci_read_config_word.
+ * o Bug fix: changed default flow control settings.
+ * o Clean up: ethtool file now has an inclusive list for adapters in the
+ * Wake-On-LAN capabilities instead of an exclusive list.
+ * o Bug fix: miscellaneous WoL bug fixes.
+ * o Added software interrupt for clearing rx ring
+ * o Bug fix: easier to undo "forcing" of 1000/fd using ethtool.
+ * o Now setting netdev->mem_end in e1000_probe.
+ * o Clean up: Moved tx_timeout from interrupt context to process context
+ * using schedule_task.
+ *
+ * 4.3.15 8/9/02
+ */
+
+char e1000_driver_name[] = "e1000";
+char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
+char e1000_driver_version[] = "4.4.19-k2";
+char e1000_copyright[] = "Copyright (c) 1999-2002 Intel Corporation.";
+
+/* e1000_pci_tbl - PCI Device ID Table
+ *
+ * Private driver_data field (last one) stores an index into e1000_strings
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ * Class, Class Mask, String Index }
+ */
+static struct pci_device_id e1000_pci_tbl[] __devinitdata = {
+ /* Intel(R) PRO/1000 Network Connection */
+ {0x8086, 0x1000, 0x8086, 0x1000, 0, 0, 0},
+ {0x8086, 0x1001, 0x8086, 0x1003, 0, 0, 0},
+ {0x8086, 0x1004, 0x8086, 0x1004, 0, 0, 0},
+ {0x8086, 0x1008, 0x8086, 0x1107, 0, 0, 0},
+ {0x8086, 0x1009, 0x8086, 0x1109, 0, 0, 0},
+ {0x8086, 0x100C, 0x8086, 0x1112, 0, 0, 0},
+ {0x8086, 0x100E, 0x8086, 0x001E, 0, 0, 0},
+ /* Compaq Gigabit Ethernet Server Adapter */
+ {0x8086, 0x1000, 0x0E11, PCI_ANY_ID, 0, 0, 1},
+ {0x8086, 0x1001, 0x0E11, PCI_ANY_ID, 0, 0, 1},
+ {0x8086, 0x1004, 0x0E11, PCI_ANY_ID, 0, 0, 1},
+ /* IBM Mobile, Desktop & Server Adapters */
+ {0x8086, 0x1000, 0x1014, PCI_ANY_ID, 0, 0, 2},
+ {0x8086, 0x1001, 0x1014, PCI_ANY_ID, 0, 0, 2},
+ {0x8086, 0x1004, 0x1014, PCI_ANY_ID, 0, 0, 2},
+ /* Generic */
+ {0x8086, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x100C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x100D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x100E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x100F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1011, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1010, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1012, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1016, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x1017, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0x8086, 0x101E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ /* required last entry */
+ {0,}
+};
+
+MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
+
+static char *e1000_strings[] = {
+ "Intel(R) PRO/1000 Network Connection",
+ "Compaq Gigabit Ethernet Server Adapter",
+ "IBM Mobile, Desktop & Server Adapters"
+};
+
+/* Local Function Prototypes */
+
+int e1000_up(struct e1000_adapter *adapter);
+void e1000_down(struct e1000_adapter *adapter);
+void e1000_reset(struct e1000_adapter *adapter);
+
+static int e1000_init_module(void);
+static void e1000_exit_module(void);
+static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+static void e1000_remove(struct pci_dev *pdev);
+static int e1000_sw_init(struct e1000_adapter *adapter);
+static int e1000_open(struct net_device *netdev);
+static int e1000_close(struct net_device *netdev);
+static int e1000_setup_tx_resources(struct e1000_adapter *adapter);
+static int e1000_setup_rx_resources(struct e1000_adapter *adapter);
+static void e1000_configure_tx(struct e1000_adapter *adapter);
+static void e1000_configure_rx(struct e1000_adapter *adapter);
+static void e1000_setup_rctl(struct e1000_adapter *adapter);
+static void e1000_clean_tx_ring(struct e1000_adapter *adapter);
+static void e1000_clean_rx_ring(struct e1000_adapter *adapter);
+static void e1000_free_tx_resources(struct e1000_adapter *adapter);
+static void e1000_free_rx_resources(struct e1000_adapter *adapter);
+static void e1000_set_multi(struct net_device *netdev);
+static void e1000_update_phy_info(unsigned long data);
+static void e1000_watchdog(unsigned long data);
+static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static struct net_device_stats * e1000_get_stats(struct net_device *netdev);
+static int e1000_change_mtu(struct net_device *netdev, int new_mtu);
+static int e1000_set_mac(struct net_device *netdev, void *p);
+static void e1000_update_stats(struct e1000_adapter *adapter);
+static inline void e1000_irq_disable(struct e1000_adapter *adapter);
+static inline void e1000_irq_enable(struct e1000_adapter *adapter);
+static void e1000_intr(int irq, void *data, struct pt_regs *regs);
+static void e1000_clean_tx_irq(struct e1000_adapter *adapter);
+static void e1000_clean_rx_irq(struct e1000_adapter *adapter);
+static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter);
+static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
+static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
+static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
+static inline void e1000_rx_checksum(struct e1000_adapter *adapter,
+ struct e1000_rx_desc *rx_desc,
+ struct sk_buff *skb);
+static void e1000_tx_timeout(struct net_device *dev);
+static void e1000_tx_timeout_task(struct net_device *dev);
+
+static void e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp);
+static void e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid);
+static void e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid);
+static void e1000_restore_vlan(struct e1000_adapter *adapter);
+
+static int e1000_notify_reboot(struct notifier_block *, unsigned long event, void *ptr);
+static int e1000_suspend(struct pci_dev *pdev, uint32_t state);
+#ifdef CONFIG_PM
+static int e1000_resume(struct pci_dev *pdev);
+#endif
+
+struct notifier_block e1000_notifier_reboot = {
+ .notifier_call = e1000_notify_reboot,
+ .next = NULL,
+ .priority = 0
+};
+
+/* Exported from other modules */
+
+extern void e1000_check_options(struct e1000_adapter *adapter);
+extern int e1000_ethtool_ioctl(struct net_device *netdev, struct ifreq *ifr);
+
+static struct pci_driver e1000_driver = {
+ .name = e1000_driver_name,
+ .id_table = e1000_pci_tbl,
+ .probe = e1000_probe,
+ .remove = __devexit_p(e1000_remove),
+ /* Power Managment Hooks */
+#ifdef CONFIG_PM
+ .suspend = e1000_suspend,
+ .resume = e1000_resume
+#endif
+};
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
+MODULE_LICENSE("GPL");
+
+/**
+ * e1000_init_module - Driver Registration Routine
+ *
+ * e1000_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+
+static int __init
+e1000_init_module(void)
+{
+ int ret;
+
+#if 0 /* Avoid disconcerting noise. */
+ printk(KERN_INFO "%s - version %s\n",
+ e1000_driver_string, e1000_driver_version);
+
+ printk(KERN_INFO "%s\n", e1000_copyright);
+#endif
+
+ ret = pci_module_init(&e1000_driver);
+// if(ret >= 0)
+// register_reboot_notifier(&e1000_notifier_reboot);
+ return ret;
+}
+
+module_init(e1000_init_module);
+
+/**
+ * e1000_exit_module - Driver Exit Cleanup Routine
+ *
+ * e1000_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+
+static void __exit
+e1000_exit_module(void)
+{
+// unregister_reboot_notifier(&e1000_notifier_reboot);
+ pci_unregister_driver(&e1000_driver);
+}
+
+module_exit(e1000_exit_module);
+
+
+int
+e1000_up(struct e1000_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ if(request_irq(netdev->irq, &e1000_intr, SA_SHIRQ | SA_SAMPLE_RANDOM,
+ netdev->name, netdev))
+ return -1;
+
+ /* hardware has been reset, we need to reload some things */
+
+ e1000_set_multi(netdev);
+ e1000_restore_vlan(adapter);
+
+ e1000_configure_tx(adapter);
+ e1000_setup_rctl(adapter);
+ e1000_configure_rx(adapter);
+ e1000_alloc_rx_buffers(adapter);
+
+ mod_timer(&adapter->watchdog_timer, jiffies);
+ e1000_irq_enable(adapter);
+
+ return 0;
+}
+
+void
+e1000_down(struct e1000_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ e1000_irq_disable(adapter);
+ free_irq(netdev->irq, netdev);
+ del_timer_sync(&adapter->watchdog_timer);
+ del_timer_sync(&adapter->phy_info_timer);
+ adapter->link_speed = 0;
+ adapter->link_duplex = 0;
+ netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
+
+ e1000_reset(adapter);
+ e1000_clean_tx_ring(adapter);
+ e1000_clean_rx_ring(adapter);
+}
+
+void
+e1000_reset(struct e1000_adapter *adapter)
+{
+ /* Repartition Pba for greater than 9k mtu
+ * To take effect CTRL.RST is required.
+ */
+
+ if(adapter->rx_buffer_len > E1000_RXBUFFER_8192)
+ E1000_WRITE_REG(&adapter->hw, PBA, E1000_JUMBO_PBA);
+ else
+ E1000_WRITE_REG(&adapter->hw, PBA, E1000_DEFAULT_PBA);
+
+ adapter->hw.fc = adapter->hw.original_fc;
+ e1000_reset_hw(&adapter->hw);
+ if(adapter->hw.mac_type >= e1000_82544)
+ E1000_WRITE_REG(&adapter->hw, WUC, 0);
+ e1000_init_hw(&adapter->hw);
+ e1000_reset_adaptive(&adapter->hw);
+ e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
+}
+
+/**
+ * e1000_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in e1000_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * e1000_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+
+static int __devinit
+e1000_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct net_device *netdev;
+ struct e1000_adapter *adapter;
+ static int cards_found = 0;
+ unsigned long mmio_start;
+ int mmio_len;
+ int pci_using_dac;
+ int i;
+ uint16_t eeprom_data;
+
+ if((i = pci_enable_device(pdev)))
+ return i;
+
+ if(!(i = pci_set_dma_mask(pdev, PCI_DMA_64BIT))) {
+ pci_using_dac = 1;
+ } else {
+ if((i = pci_set_dma_mask(pdev, PCI_DMA_32BIT))) {
+ E1000_ERR("No usable DMA configuration, aborting\n");
+ return i;
+ }
+ pci_using_dac = 0;
+ }
+
+ if((i = pci_request_regions(pdev, e1000_driver_name)))
+ return i;
+
+ pci_set_master(pdev);
+
+ netdev = alloc_etherdev(sizeof(struct e1000_adapter));
+ if(!netdev)
+ goto err_alloc_etherdev;
+
+ SET_MODULE_OWNER(netdev);
+
+ pci_set_drvdata(pdev, netdev);
+ adapter = netdev->priv;
+ adapter->netdev = netdev;
+ adapter->pdev = pdev;
+ adapter->hw.back = adapter;
+
+ mmio_start = pci_resource_start(pdev, BAR_0);
+ mmio_len = pci_resource_len(pdev, BAR_0);
+
+ adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
+ if(!adapter->hw.hw_addr)
+ goto err_ioremap;
+
+ for(i = BAR_1; i <= BAR_5; i++) {
+ if(pci_resource_len(pdev, i) == 0)
+ continue;
+ if(pci_resource_flags(pdev, i) & IORESOURCE_IO) {
+ adapter->hw.io_base = pci_resource_start(pdev, i);
+ break;
+ }
+ }
+
+ netdev->open = &e1000_open;
+ netdev->stop = &e1000_close;
+ netdev->hard_start_xmit = &e1000_xmit_frame;
+ netdev->get_stats = &e1000_get_stats;
+ netdev->set_multicast_list = &e1000_set_multi;
+ netdev->set_mac_address = &e1000_set_mac;
+ netdev->change_mtu = &e1000_change_mtu;
+ netdev->do_ioctl = &e1000_ioctl;
+ netdev->tx_timeout = &e1000_tx_timeout;
+ netdev->watchdog_timeo = HZ;
+ netdev->vlan_rx_register = e1000_vlan_rx_register;
+ netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid;
+ netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid;
+
+ netdev->irq = pdev->irq;
+ netdev->mem_start = mmio_start;
+ netdev->mem_end = mmio_start + mmio_len;
+ netdev->base_addr = adapter->hw.io_base;
+
+ adapter->bd_number = cards_found;
+ adapter->id_string = e1000_strings[ent->driver_data];
+
+ /* setup the private structure */
+
+ if(e1000_sw_init(adapter))
+ goto err_sw_init;
+
+ if(adapter->hw.mac_type >= e1000_82543) {
+ netdev->features = NETIF_F_SG |
+ NETIF_F_HW_CSUM |
+ NETIF_F_HW_VLAN_TX |
+ NETIF_F_HW_VLAN_RX |
+ NETIF_F_HW_VLAN_FILTER;
+ } else {
+ netdev->features = NETIF_F_SG;
+ }
+
+ if(pci_using_dac)
+ netdev->features |= NETIF_F_HIGHDMA;
+
+ /* make sure the EEPROM is good */
+
+ if(e1000_validate_eeprom_checksum(&adapter->hw) < 0) {
+ printk(KERN_ERR "The EEPROM Checksum Is Not Valid\n");
+ goto err_eeprom;
+ }
+
+ /* copy the MAC address out of the EEPROM */
+
+ e1000_read_mac_addr(&adapter->hw);
+ memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
+
+ if(!is_valid_ether_addr(netdev->dev_addr))
+ goto err_eeprom;
+
+ e1000_read_part_num(&adapter->hw, &(adapter->part_num));
+
+ e1000_get_bus_info(&adapter->hw);
+
+ if((adapter->hw.mac_type == e1000_82544) &&
+ (adapter->hw.bus_type == e1000_bus_type_pcix))
+
+ adapter->max_data_per_txd = 4096;
+ else
+ adapter->max_data_per_txd = MAX_JUMBO_FRAME_SIZE;
+
+
+ init_timer(&adapter->watchdog_timer);
+ adapter->watchdog_timer.function = &e1000_watchdog;
+ adapter->watchdog_timer.data = (unsigned long) adapter;
+
+ init_timer(&adapter->phy_info_timer);
+ adapter->phy_info_timer.function = &e1000_update_phy_info;
+ adapter->phy_info_timer.data = (unsigned long) adapter;
+
+ INIT_TQUEUE(&adapter->tx_timeout_task,
+ (void (*)(void *))e1000_tx_timeout_task, netdev);
+
+ register_netdev(netdev);
+ memcpy(adapter->ifname, netdev->name, IFNAMSIZ);
+ adapter->ifname[IFNAMSIZ-1] = 0;
+
+ /* we're going to reset, so assume we have no link for now */
+
+ netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
+
+ printk(KERN_INFO "%s: %s\n", netdev->name, adapter->id_string);
+ e1000_check_options(adapter);
+ /* Initial Wake on LAN setting
+ * If APM wake is enabled in the EEPROM,
+ * enable the ACPI Magic Packet filter
+ */
+
+ e1000_read_eeprom(&adapter->hw, EEPROM_INIT_CONTROL2_REG, &eeprom_data);
+ if((adapter->hw.mac_type >= e1000_82544) &&
+ (eeprom_data & E1000_EEPROM_APME))
+ adapter->wol |= E1000_WUFC_MAG;
+
+ /* reset the hardware with the new settings */
+
+ e1000_reset(adapter);
+ cards_found++;
+ return 0;
+
+err_sw_init:
+err_eeprom:
+ iounmap(adapter->hw.hw_addr);
+err_ioremap:
+ pci_release_regions(pdev);
+ kfree(netdev);
+err_alloc_etherdev:
+ return -ENOMEM;
+}
+
+/**
+ * e1000_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * e1000_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device. The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+
+static void __devexit
+e1000_remove(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t manc;
+
+ if(adapter->hw.mac_type >= e1000_82540) {
+ manc = E1000_READ_REG(&adapter->hw, MANC);
+ if(manc & E1000_MANC_SMBUS_EN) {
+ manc |= E1000_MANC_ARP_EN;
+ E1000_WRITE_REG(&adapter->hw, MANC, manc);
+ }
+ }
+
+ unregister_netdev(netdev);
+
+ e1000_phy_hw_reset(&adapter->hw);
+
+ iounmap(adapter->hw.hw_addr);
+ pci_release_regions(pdev);
+
+ kfree(netdev);
+}
+
+/**
+ * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * e1000_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+
+static int __devinit
+e1000_sw_init(struct e1000_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+
+ /* PCI config space info */
+
+ hw->vendor_id = pdev->vendor;
+ hw->device_id = pdev->device;
+ hw->subsystem_vendor_id = pdev->subsystem_vendor;
+ hw->subsystem_id = pdev->subsystem_device;
+
+ pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+
+ pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word);
+
+ adapter->rx_buffer_len = E1000_RXBUFFER_2048;
+ hw->max_frame_size = netdev->mtu +
+ ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
+ hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE;
+
+ /* identify the MAC */
+
+ if (e1000_set_mac_type(hw)) {
+ E1000_ERR("Unknown MAC Type\n");
+ return -1;
+ }
+
+ /* flow control settings */
+
+ hw->fc_high_water = E1000_FC_HIGH_THRESH;
+ hw->fc_low_water = E1000_FC_LOW_THRESH;
+ hw->fc_pause_time = E1000_FC_PAUSE_TIME;
+ hw->fc_send_xon = 1;
+
+ /* Media type - copper or fiber */
+
+ if(hw->mac_type >= e1000_82543) {
+ uint32_t status = E1000_READ_REG(hw, STATUS);
+
+ if(status & E1000_STATUS_TBIMODE)
+ hw->media_type = e1000_media_type_fiber;
+ else
+ hw->media_type = e1000_media_type_copper;
+ } else {
+ hw->media_type = e1000_media_type_fiber;
+ }
+
+ if(hw->mac_type < e1000_82543)
+ hw->report_tx_early = 0;
+ else
+ hw->report_tx_early = 1;
+
+ hw->wait_autoneg_complete = FALSE;
+ hw->tbi_compatibility_en = TRUE;
+ hw->adaptive_ifs = TRUE;
+
+ /* Copper options */
+
+ if(hw->media_type == e1000_media_type_copper) {
+ hw->mdix = AUTO_ALL_MODES;
+ hw->disable_polarity_correction = FALSE;
+ }
+
+ atomic_set(&adapter->irq_sem, 1);
+ spin_lock_init(&adapter->stats_lock);
+
+ return 0;
+}
+
+/**
+ * e1000_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+
+static int
+e1000_open(struct net_device *netdev)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+
+ /* allocate transmit descriptors */
+
+ if(e1000_setup_tx_resources(adapter))
+ goto err_setup_tx;
+
+ /* allocate receive descriptors */
+
+ if(e1000_setup_rx_resources(adapter))
+ goto err_setup_rx;
+
+ if(e1000_up(adapter))
+ goto err_up;
+
+ return 0;
+
+err_up:
+ e1000_free_rx_resources(adapter);
+err_setup_rx:
+ e1000_free_tx_resources(adapter);
+err_setup_tx:
+ e1000_reset(adapter);
+
+ return -EBUSY;
+}
+
+/**
+ * e1000_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS. The hardware is still under the drivers control, but
+ * needs to be disabled. A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+
+static int
+e1000_close(struct net_device *netdev)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+
+ e1000_down(adapter);
+
+ e1000_free_tx_resources(adapter);
+ e1000_free_rx_resources(adapter);
+
+ return 0;
+}
+
+/**
+ * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+
+static int
+e1000_setup_tx_resources(struct e1000_adapter *adapter)
+{
+ struct e1000_desc_ring *txdr = &adapter->tx_ring;
+ struct pci_dev *pdev = adapter->pdev;
+ int size;
+
+ size = sizeof(struct e1000_buffer) * txdr->count;
+ txdr->buffer_info = kmalloc(size, GFP_KERNEL);
+ if(!txdr->buffer_info) {
+ return -ENOMEM;
+ }
+ memset(txdr->buffer_info, 0, size);
+
+ /* round up to nearest 4K */
+
+ txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
+ E1000_ROUNDUP(txdr->size, 4096);
+
+ txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma);
+ if(!txdr->desc) {
+ kfree(txdr->buffer_info);
+ return -ENOMEM;
+ }
+ memset(txdr->desc, 0, txdr->size);
+
+ txdr->next_to_use = 0;
+ txdr->next_to_clean = 0;
+
+ return 0;
+}
+
+/**
+ * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+
+static void
+e1000_configure_tx(struct e1000_adapter *adapter)
+{
+ uint64_t tdba = adapter->tx_ring.dma;
+ uint32_t tdlen = adapter->tx_ring.count * sizeof(struct e1000_tx_desc);
+ uint32_t tctl, tipg;
+
+ E1000_WRITE_REG(&adapter->hw, TDBAL, (tdba & 0x00000000ffffffffULL));
+ E1000_WRITE_REG(&adapter->hw, TDBAH, (tdba >> 32));
+
+ E1000_WRITE_REG(&adapter->hw, TDLEN, tdlen);
+
+ /* Setup the HW Tx Head and Tail descriptor pointers */
+
+ E1000_WRITE_REG(&adapter->hw, TDH, 0);
+ E1000_WRITE_REG(&adapter->hw, TDT, 0);
+
+ /* Set the default values for the Tx Inter Packet Gap timer */
+
+ switch (adapter->hw.mac_type) {
+ case e1000_82542_rev2_0:
+ case e1000_82542_rev2_1:
+ tipg = DEFAULT_82542_TIPG_IPGT;
+ tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
+ tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
+ break;
+ default:
+ if(adapter->hw.media_type == e1000_media_type_fiber)
+ tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
+ else
+ tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
+ tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
+ tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
+ }
+ E1000_WRITE_REG(&adapter->hw, TIPG, tipg);
+
+ /* Set the Tx Interrupt Delay register */
+
+ E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay);
+ if(adapter->hw.mac_type >= e1000_82540)
+ E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay);
+
+ /* Program the Transmit Control Register */
+
+ tctl = E1000_READ_REG(&adapter->hw, TCTL);
+
+ tctl &= ~E1000_TCTL_CT;
+ tctl |= E1000_TCTL_EN | E1000_TCTL_PSP |
+ (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
+
+ E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
+
+ e1000_config_collision_dist(&adapter->hw);
+
+ /* Setup Transmit Descriptor Settings for this adapter */
+ adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_IDE;
+
+ if(adapter->hw.report_tx_early == 1)
+ adapter->txd_cmd |= E1000_TXD_CMD_RS;
+ else
+ adapter->txd_cmd |= E1000_TXD_CMD_RPS;
+}
+
+/**
+ * e1000_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+
+static int
+e1000_setup_rx_resources(struct e1000_adapter *adapter)
+{
+ struct e1000_desc_ring *rxdr = &adapter->rx_ring;
+ struct pci_dev *pdev = adapter->pdev;
+ int size;
+
+ size = sizeof(struct e1000_buffer) * rxdr->count;
+ rxdr->buffer_info = kmalloc(size, GFP_KERNEL);
+ if(!rxdr->buffer_info) {
+ return -ENOMEM;
+ }
+ memset(rxdr->buffer_info, 0, size);
+
+ /* Round up to nearest 4K */
+
+ rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc);
+ E1000_ROUNDUP(rxdr->size, 4096);
+
+ rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
+
+ if(!rxdr->desc) {
+ kfree(rxdr->buffer_info);
+ return -ENOMEM;
+ }
+ memset(rxdr->desc, 0, rxdr->size);
+
+ rxdr->next_to_clean = 0;
+ rxdr->next_to_use = 0;
+
+ return 0;
+}
+
+/**
+ * e1000_setup_rctl - configure the receive control register
+ * @adapter: Board private structure
+ **/
+
+static void
+e1000_setup_rctl(struct e1000_adapter *adapter)
+{
+ uint32_t rctl;
+
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+
+ rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+
+ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
+ E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+ (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+ if(adapter->hw.tbi_compatibility_on == 1)
+ rctl |= E1000_RCTL_SBP;
+ else
+ rctl &= ~E1000_RCTL_SBP;
+
+ rctl &= ~(E1000_RCTL_SZ_4096);
+ switch (adapter->rx_buffer_len) {
+ case E1000_RXBUFFER_2048:
+ default:
+ rctl |= E1000_RCTL_SZ_2048;
+ rctl &= ~(E1000_RCTL_BSEX | E1000_RCTL_LPE);
+ break;
+ case E1000_RXBUFFER_4096:
+ rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+ break;
+ case E1000_RXBUFFER_8192:
+ rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+ break;
+ case E1000_RXBUFFER_16384:
+ rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+ break;
+ }
+
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+}
+
+/**
+ * e1000_configure_rx - Configure 8254x Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+
+static void
+e1000_configure_rx(struct e1000_adapter *adapter)
+{
+ uint64_t rdba = adapter->rx_ring.dma;
+ uint32_t rdlen = adapter->rx_ring.count * sizeof(struct e1000_rx_desc);
+ uint32_t rctl;
+ uint32_t rxcsum;
+
+ /* make sure receives are disabled while setting up the descriptors */
+
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl & ~E1000_RCTL_EN);
+
+ /* set the Receive Delay Timer Register */
+
+ E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay);
+
+ if(adapter->hw.mac_type >= e1000_82540) {
+ E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay);
+
+ /* Set the interrupt throttling rate. Value is calculated
+ * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
+#define MAX_INTS_PER_SEC 8000
+#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256)
+ E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
+ }
+
+ /* Setup the Base and Length of the Rx Descriptor Ring */
+
+ E1000_WRITE_REG(&adapter->hw, RDBAL, (rdba & 0x00000000ffffffffULL));
+ E1000_WRITE_REG(&adapter->hw, RDBAH, (rdba >> 32));
+
+ E1000_WRITE_REG(&adapter->hw, RDLEN, rdlen);
+
+ /* Setup the HW Rx Head and Tail Descriptor Pointers */
+ E1000_WRITE_REG(&adapter->hw, RDH, 0);
+ E1000_WRITE_REG(&adapter->hw, RDT, 0);
+
+ /* Enable 82543 Receive Checksum Offload for TCP and UDP */
+ if((adapter->hw.mac_type >= e1000_82543) &&
+ (adapter->rx_csum == TRUE)) {
+ rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
+ rxcsum |= E1000_RXCSUM_TUOFL;
+ E1000_WRITE_REG(&adapter->hw, RXCSUM, rxcsum);
+ }
+
+ /* Enable Receives */
+
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+}
+
+/**
+ * e1000_free_tx_resources - Free Tx Resources
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+
+static void
+e1000_free_tx_resources(struct e1000_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+
+ e1000_clean_tx_ring(adapter);
+
+ kfree(adapter->tx_ring.buffer_info);
+ adapter->tx_ring.buffer_info = NULL;
+
+ pci_free_consistent(pdev, adapter->tx_ring.size,
+ adapter->tx_ring.desc, adapter->tx_ring.dma);
+
+ adapter->tx_ring.desc = NULL;
+}
+
+/**
+ * e1000_clean_tx_ring - Free Tx Buffers
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_tx_ring(struct e1000_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ unsigned long size;
+ int i;
+
+ /* Free all the Tx ring sk_buffs */
+
+ for(i = 0; i < adapter->tx_ring.count; i++) {
+ if(adapter->tx_ring.buffer_info[i].skb) {
+
+ pci_unmap_page(pdev,
+ adapter->tx_ring.buffer_info[i].dma,
+ adapter->tx_ring.buffer_info[i].length,
+ PCI_DMA_TODEVICE);
+
+ dev_kfree_skb(adapter->tx_ring.buffer_info[i].skb);
+
+ adapter->tx_ring.buffer_info[i].skb = NULL;
+ }
+ }
+
+ size = sizeof(struct e1000_buffer) * adapter->tx_ring.count;
+ memset(adapter->tx_ring.buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+
+ memset(adapter->tx_ring.desc, 0, adapter->tx_ring.size);
+
+ adapter->tx_ring.next_to_use = 0;
+ adapter->tx_ring.next_to_clean = 0;
+
+ E1000_WRITE_REG(&adapter->hw, TDH, 0);
+ E1000_WRITE_REG(&adapter->hw, TDT, 0);
+}
+
+/**
+ * e1000_free_rx_resources - Free Rx Resources
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+
+static void
+e1000_free_rx_resources(struct e1000_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+
+ e1000_clean_rx_ring(adapter);
+
+ kfree(adapter->rx_ring.buffer_info);
+ adapter->rx_ring.buffer_info = NULL;
+
+ pci_free_consistent(pdev, adapter->rx_ring.size,
+ adapter->rx_ring.desc, adapter->rx_ring.dma);
+
+ adapter->rx_ring.desc = NULL;
+}
+
+/**
+ * e1000_clean_rx_ring - Free Rx Buffers
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_rx_ring(struct e1000_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ unsigned long size;
+ int i;
+
+ /* Free all the Rx ring sk_buffs */
+
+ for(i = 0; i < adapter->rx_ring.count; i++) {
+ if(adapter->rx_ring.buffer_info[i].skb) {
+
+ pci_unmap_single(pdev,
+ adapter->rx_ring.buffer_info[i].dma,
+ adapter->rx_ring.buffer_info[i].length,
+ PCI_DMA_FROMDEVICE);
+
+ dev_kfree_skb(adapter->rx_ring.buffer_info[i].skb);
+
+ adapter->rx_ring.buffer_info[i].skb = NULL;
+ }
+ }
+
+ size = sizeof(struct e1000_buffer) * adapter->rx_ring.count;
+ memset(adapter->rx_ring.buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+
+ memset(adapter->rx_ring.desc, 0, adapter->rx_ring.size);
+
+ adapter->rx_ring.next_to_clean = 0;
+ adapter->rx_ring.next_to_use = 0;
+
+ E1000_WRITE_REG(&adapter->hw, RDH, 0);
+ E1000_WRITE_REG(&adapter->hw, RDT, 0);
+}
+
+/* The 82542 2.0 (revision 2) needs to have the receive unit in reset
+ * and memory write and invalidate disabled for certain operations
+ */
+static void
+e1000_enter_82542_rst(struct e1000_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ uint32_t rctl;
+
+ e1000_pci_clear_mwi(&adapter->hw);
+
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ rctl |= E1000_RCTL_RST;
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+ E1000_WRITE_FLUSH(&adapter->hw);
+ mdelay(5);
+
+ if(netif_running(netdev))
+ e1000_clean_rx_ring(adapter);
+}
+
+static void
+e1000_leave_82542_rst(struct e1000_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ uint32_t rctl;
+
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ rctl &= ~E1000_RCTL_RST;
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+ E1000_WRITE_FLUSH(&adapter->hw);
+ mdelay(5);
+
+ if(adapter->hw.pci_cmd_word & PCI_COMMAND_INVALIDATE)
+ e1000_pci_set_mwi(&adapter->hw);
+
+ if(netif_running(netdev)) {
+ e1000_configure_rx(adapter);
+ e1000_alloc_rx_buffers(adapter);
+ }
+}
+
+/**
+ * e1000_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+
+static int
+e1000_set_mac(struct net_device *netdev, void *p)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ struct sockaddr *addr = p;
+
+ if(!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ /* 82542 2.0 needs to be in reset to write receive address registers */
+
+ if(adapter->hw.mac_type == e1000_82542_rev2_0)
+ e1000_enter_82542_rst(adapter);
+
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ memcpy(adapter->hw.mac_addr, addr->sa_data, netdev->addr_len);
+
+ e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0);
+
+ if(adapter->hw.mac_type == e1000_82542_rev2_0)
+ e1000_leave_82542_rst(adapter);
+
+ return 0;
+}
+
+/**
+ * e1000_set_multi - Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_multi entry point is called whenever the multicast address
+ * list or the network interface flags are updated. This routine is
+ * resposible for configuring the hardware for proper multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+
+static void
+e1000_set_multi(struct net_device *netdev)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ struct e1000_hw *hw = &adapter->hw;
+ struct dev_mc_list *mc_ptr;
+ uint32_t rctl;
+ uint32_t hash_value;
+ int i;
+
+ /* Check for Promiscuous and All Multicast modes */
+
+ rctl = E1000_READ_REG(hw, RCTL);
+
+ if(netdev->flags & IFF_PROMISC) {
+ rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
+ } else if(netdev->flags & IFF_ALLMULTI) {
+ rctl |= E1000_RCTL_MPE;
+ rctl &= ~E1000_RCTL_UPE;
+ } else {
+ rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
+ }
+
+ E1000_WRITE_REG(hw, RCTL, rctl);
+
+ /* 82542 2.0 needs to be in reset to write receive address registers */
+
+ if(hw->mac_type == e1000_82542_rev2_0)
+ e1000_enter_82542_rst(adapter);
+
+ /* load the first 15 multicast address into the exact filters 1-15
+ * RAR 0 is used for the station MAC adddress
+ * if there are not 15 addresses, go ahead and clear the filters
+ */
+ mc_ptr = netdev->mc_list;
+
+ for(i = 1; i < E1000_RAR_ENTRIES; i++) {
+ if(mc_ptr) {
+ e1000_rar_set(hw, mc_ptr->dmi_addr, i);
+ mc_ptr = mc_ptr->next;
+ } else {
+ E1000_WRITE_REG_ARRAY(hw, RA, i << 1, 0);
+ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1) + 1, 0);
+ }
+ }
+
+ /* clear the old settings from the multicast hash table */
+
+ for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++)
+ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+
+ /* load any remaining addresses into the hash table */
+
+ for(; mc_ptr; mc_ptr = mc_ptr->next) {
+ hash_value = e1000_hash_mc_addr(hw, mc_ptr->dmi_addr);
+ e1000_mta_set(hw, hash_value);
+ }
+
+ if(hw->mac_type == e1000_82542_rev2_0)
+ e1000_leave_82542_rst(adapter);
+}
+
+
+/* need to wait a few seconds after link up to get diagnostic information from the phy */
+
+static void
+e1000_update_phy_info(unsigned long data)
+{
+ struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+ e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
+}
+
+/**
+ * e1000_watchdog - Timer Call-back
+ * @data: pointer to netdev cast into an unsigned long
+ **/
+
+static void
+e1000_watchdog(unsigned long data)
+{
+ struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_desc_ring *txdr = &adapter->tx_ring;
+ int i;
+
+ e1000_check_for_link(&adapter->hw);
+
+ if(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
+ if(!netif_carrier_ok(netdev)) {
+ e1000_get_speed_and_duplex(&adapter->hw,
+ &adapter->link_speed,
+ &adapter->link_duplex);
+
+ printk(KERN_INFO
+ "e1000: %s NIC Link is Up %d Mbps %s\n",
+ netdev->name, adapter->link_speed,
+ adapter->link_duplex == FULL_DUPLEX ?
+ "Full Duplex" : "Half Duplex");
+
+ netif_carrier_on(netdev);
+ netif_wake_queue(netdev);
+ mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ);
+ }
+ } else {
+ if(netif_carrier_ok(netdev)) {
+ adapter->link_speed = 0;
+ adapter->link_duplex = 0;
+ printk(KERN_INFO
+ "e1000: %s NIC Link is Down\n",
+ netdev->name);
+ netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
+ mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ);
+ }
+ }
+
+ e1000_update_stats(adapter);
+ e1000_update_adaptive(&adapter->hw);
+
+
+ /* Cause software interrupt to ensure rx ring is cleaned */
+ E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0);
+
+ /* Early detection of hung controller */
+ i = txdr->next_to_clean;
+ if(txdr->buffer_info[i].dma &&
+ time_after(jiffies, txdr->buffer_info[i].time_stamp + HZ) &&
+ !(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF))
+ netif_stop_queue(netdev);
+
+ /* Reset the timer */
+ mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ);
+}
+
+#define E1000_TX_FLAGS_CSUM 0x00000001
+#define E1000_TX_FLAGS_VLAN 0x00000002
+#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000
+#define E1000_TX_FLAGS_VLAN_SHIFT 16
+
+static inline boolean_t
+e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
+{
+ struct e1000_context_desc *context_desc;
+ int i;
+ uint8_t css, cso;
+
+ if(skb->ip_summed == CHECKSUM_HW) {
+ css = skb->h.raw - skb->data;
+ cso = (skb->h.raw + skb->csum) - skb->data;
+
+ i = adapter->tx_ring.next_to_use;
+ context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
+
+ context_desc->upper_setup.tcp_fields.tucss = css;
+ context_desc->upper_setup.tcp_fields.tucso = cso;
+ context_desc->upper_setup.tcp_fields.tucse = 0;
+ context_desc->tcp_seg_setup.data = 0;
+ context_desc->cmd_and_length =
+ cpu_to_le32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
+
+ i = (i + 1) % adapter->tx_ring.count;
+ adapter->tx_ring.next_to_use = i;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static inline int
+e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb)
+{
+ struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+ int len, offset, size, count, i;
+
+ int f;
+ len = skb->len - skb->data_len;
+ i = (tx_ring->next_to_use + tx_ring->count - 1) % tx_ring->count;
+ count = 0;
+
+ offset = 0;
+
+ while(len) {
+ i = (i + 1) % tx_ring->count;
+ size = min(len, adapter->max_data_per_txd);
+ tx_ring->buffer_info[i].length = size;
+ tx_ring->buffer_info[i].dma =
+ pci_map_single(adapter->pdev,
+ skb->data + offset,
+ size,
+ PCI_DMA_TODEVICE);
+ tx_ring->buffer_info[i].time_stamp = jiffies;
+
+ len -= size;
+ offset += size;
+ count++;
+ }
+
+ for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+ struct skb_frag_struct *frag;
+
+ frag = &skb_shinfo(skb)->frags[f];
+ len = frag->size;
+ offset = 0;
+
+ while(len) {
+ i = (i + 1) % tx_ring->count;
+ size = min(len, adapter->max_data_per_txd);
+ tx_ring->buffer_info[i].length = size;
+ tx_ring->buffer_info[i].dma =
+ pci_map_page(adapter->pdev,
+ frag->page,
+ frag->page_offset + offset,
+ size,
+ PCI_DMA_TODEVICE);
+
+ len -= size;
+ offset += size;
+ count++;
+ }
+ }
+ tx_ring->buffer_info[i].skb = skb;
+
+ return count;
+}
+
+static inline void
+e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags)
+{
+ struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+ struct e1000_tx_desc *tx_desc = NULL;
+ uint32_t txd_upper, txd_lower;
+ int i;
+
+ txd_upper = 0;
+ txd_lower = adapter->txd_cmd;
+
+ if(tx_flags & E1000_TX_FLAGS_CSUM) {
+ txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+ txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+ }
+
+ if(tx_flags & E1000_TX_FLAGS_VLAN) {
+ txd_lower |= E1000_TXD_CMD_VLE;
+ txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
+ }
+
+ i = tx_ring->next_to_use;
+
+ while(count--) {
+ tx_desc = E1000_TX_DESC(*tx_ring, i);
+ tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma);
+ tx_desc->lower.data =
+ cpu_to_le32(txd_lower | tx_ring->buffer_info[i].length);
+ tx_desc->upper.data = cpu_to_le32(txd_upper);
+ i = (i + 1) % tx_ring->count;
+ }
+
+ tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP);
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+
+ tx_ring->next_to_use = i;
+ E1000_WRITE_REG(&adapter->hw, TDT, i);
+}
+
+#define TXD_USE_COUNT(S, X) (((S) / (X)) + (((S) % (X)) ? 1 : 0))
+
+static int
+e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ int tx_flags = 0, count;
+ int f;
+
+ count = TXD_USE_COUNT(skb->len - skb->data_len,
+ adapter->max_data_per_txd);
+
+ if(count == 0) {
+ dev_kfree_skb_any(skb);
+ return 0;
+ }
+
+ for(f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+ count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size,
+ adapter->max_data_per_txd);
+
+ if(skb->ip_summed == CHECKSUM_HW)
+ count++;
+
+ if(E1000_DESC_UNUSED(&adapter->tx_ring) < count) {
+ netif_stop_queue(netdev);
+ return 1;
+ }
+
+ if(e1000_tx_csum(adapter, skb))
+ tx_flags |= E1000_TX_FLAGS_CSUM;
+
+ if(adapter->vlgrp && vlan_tx_tag_present(skb)) {
+ tx_flags |= E1000_TX_FLAGS_VLAN;
+ tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
+ }
+
+ count = e1000_tx_map(adapter, skb);
+
+ e1000_tx_queue(adapter, count, tx_flags);
+
+ netdev->trans_start = jiffies;
+
+ return 0;
+}
+
+/**
+ * e1000_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ **/
+
+static void
+e1000_tx_timeout(struct net_device *netdev)
+{
+ //struct e1000_adapter *adapter = netdev->priv;
+
+ /* Do the reset outside of interrupt context */
+ //schedule_task(&adapter->tx_timeout_task); XXXX Not in Xen!!!
+ e1000_tx_timeout_task(netdev); // XXX HACK
+}
+
+static void
+e1000_tx_timeout_task(struct net_device *netdev)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+
+ netif_device_detach(netdev);
+ e1000_down(adapter);
+ e1000_up(adapter);
+ netif_device_attach(netdev);
+}
+
+/**
+ * e1000_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the timer callback.
+ **/
+
+static struct net_device_stats *
+e1000_get_stats(struct net_device *netdev)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+
+ return &adapter->net_stats;
+}
+
+/**
+ * e1000_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+
+static int
+e1000_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ int old_mtu = adapter->rx_buffer_len;
+ int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
+
+ if((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) ||
+ (max_frame > MAX_JUMBO_FRAME_SIZE)) {
+ E1000_ERR("Invalid MTU setting\n");
+ return -EINVAL;
+ }
+
+ if(max_frame <= MAXIMUM_ETHERNET_FRAME_SIZE) {
+ adapter->rx_buffer_len = E1000_RXBUFFER_2048;
+
+ } else if(adapter->hw.mac_type < e1000_82543) {
+ E1000_ERR("Jumbo Frames not supported on 82542\n");
+ return -EINVAL;
+
+ } else if(max_frame <= E1000_RXBUFFER_4096) {
+ adapter->rx_buffer_len = E1000_RXBUFFER_4096;
+
+ } else if(max_frame <= E1000_RXBUFFER_8192) {
+ adapter->rx_buffer_len = E1000_RXBUFFER_8192;
+
+ } else {
+ adapter->rx_buffer_len = E1000_RXBUFFER_16384;
+ }
+
+ if(old_mtu != adapter->rx_buffer_len && netif_running(netdev)) {
+
+ e1000_down(adapter);
+ e1000_up(adapter);
+ }
+
+ netdev->mtu = new_mtu;
+ adapter->hw.max_frame_size = max_frame;
+
+ return 0;
+}
+
+/**
+ * e1000_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_update_stats(struct e1000_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ unsigned long flags;
+ uint16_t phy_tmp;
+
+#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
+
+ spin_lock_irqsave(&adapter->stats_lock, flags);
+
+ /* these counters are modified from e1000_adjust_tbi_stats,
+ * called from the interrupt context, so they must only
+ * be written while holding adapter->stats_lock
+ */
+
+ adapter->stats.crcerrs += E1000_READ_REG(hw, CRCERRS);
+ adapter->stats.gprc += E1000_READ_REG(hw, GPRC);
+ adapter->stats.gorcl += E1000_READ_REG(hw, GORCL);
+ adapter->stats.gorch += E1000_READ_REG(hw, GORCH);
+ adapter->stats.bprc += E1000_READ_REG(hw, BPRC);
+ adapter->stats.mprc += E1000_READ_REG(hw, MPRC);
+ adapter->stats.roc += E1000_READ_REG(hw, ROC);
+ adapter->stats.prc64 += E1000_READ_REG(hw, PRC64);
+ adapter->stats.prc127 += E1000_READ_REG(hw, PRC127);
+ adapter->stats.prc255 += E1000_READ_REG(hw, PRC255);
+ adapter->stats.prc511 += E1000_READ_REG(hw, PRC511);
+ adapter->stats.prc1023 += E1000_READ_REG(hw, PRC1023);
+ adapter->stats.prc1522 += E1000_READ_REG(hw, PRC1522);
+
+ spin_unlock_irqrestore(&adapter->stats_lock, flags);
+
+ /* the rest of the counters are only modified here */
+
+ adapter->stats.symerrs += E1000_READ_REG(hw, SYMERRS);
+ adapter->stats.mpc += E1000_READ_REG(hw, MPC);
+ adapter->stats.scc += E1000_READ_REG(hw, SCC);
+ adapter->stats.ecol += E1000_READ_REG(hw, ECOL);
+ adapter->stats.mcc += E1000_READ_REG(hw, MCC);
+ adapter->stats.latecol += E1000_READ_REG(hw, LATECOL);
+ adapter->stats.dc += E1000_READ_REG(hw, DC);
+ adapter->stats.sec += E1000_READ_REG(hw, SEC);
+ adapter->stats.rlec += E1000_READ_REG(hw, RLEC);
+ adapter->stats.xonrxc += E1000_READ_REG(hw, XONRXC);
+ adapter->stats.xontxc += E1000_READ_REG(hw, XONTXC);
+ adapter->stats.xoffrxc += E1000_READ_REG(hw, XOFFRXC);
+ adapter->stats.xofftxc += E1000_READ_REG(hw, XOFFTXC);
+ adapter->stats.fcruc += E1000_READ_REG(hw, FCRUC);
+ adapter->stats.gptc += E1000_READ_REG(hw, GPTC);
+ adapter->stats.gotcl += E1000_READ_REG(hw, GOTCL);
+ adapter->stats.gotch += E1000_READ_REG(hw, GOTCH);
+ adapter->stats.rnbc += E1000_READ_REG(hw, RNBC);
+ adapter->stats.ruc += E1000_READ_REG(hw, RUC);
+ adapter->stats.rfc += E1000_READ_REG(hw, RFC);
+ adapter->stats.rjc += E1000_READ_REG(hw, RJC);
+ adapter->stats.torl += E1000_READ_REG(hw, TORL);
+ adapter->stats.torh += E1000_READ_REG(hw, TORH);
+ adapter->stats.totl += E1000_READ_REG(hw, TOTL);
+ adapter->stats.toth += E1000_READ_REG(hw, TOTH);
+ adapter->stats.tpr += E1000_READ_REG(hw, TPR);
+ adapter->stats.ptc64 += E1000_READ_REG(hw, PTC64);
+ adapter->stats.ptc127 += E1000_READ_REG(hw, PTC127);
+ adapter->stats.ptc255 += E1000_READ_REG(hw, PTC255);
+ adapter->stats.ptc511 += E1000_READ_REG(hw, PTC511);
+ adapter->stats.ptc1023 += E1000_READ_REG(hw, PTC1023);
+ adapter->stats.ptc1522 += E1000_READ_REG(hw, PTC1522);
+ adapter->stats.mptc += E1000_READ_REG(hw, MPTC);
+ adapter->stats.bptc += E1000_READ_REG(hw, BPTC);
+
+ /* used for adaptive IFS */
+
+ hw->tx_packet_delta = E1000_READ_REG(hw, TPT);
+ adapter->stats.tpt += hw->tx_packet_delta;
+ hw->collision_delta = E1000_READ_REG(hw, COLC);
+ adapter->stats.colc += hw->collision_delta;
+
+ if(hw->mac_type >= e1000_82543) {
+ adapter->stats.algnerrc += E1000_READ_REG(hw, ALGNERRC);
+ adapter->stats.rxerrc += E1000_READ_REG(hw, RXERRC);
+ adapter->stats.tncrs += E1000_READ_REG(hw, TNCRS);
+ adapter->stats.cexterr += E1000_READ_REG(hw, CEXTERR);
+ adapter->stats.tsctc += E1000_READ_REG(hw, TSCTC);
+ adapter->stats.tsctfc += E1000_READ_REG(hw, TSCTFC);
+ }
+
+ /* Fill out the OS statistics structure */
+
+ adapter->net_stats.rx_packets = adapter->stats.gprc;
+ adapter->net_stats.tx_packets = adapter->stats.gptc;
+ adapter->net_stats.rx_bytes = adapter->stats.gorcl;
+ adapter->net_stats.tx_bytes = adapter->stats.gotcl;
+ adapter->net_stats.multicast = adapter->stats.mprc;
+ adapter->net_stats.collisions = adapter->stats.colc;
+
+ /* Rx Errors */
+
+ adapter->net_stats.rx_errors = adapter->stats.rxerrc +
+ adapter->stats.crcerrs + adapter->stats.algnerrc +
+ adapter->stats.rlec + adapter->stats.rnbc +
+ adapter->stats.mpc + adapter->stats.cexterr;
+ adapter->net_stats.rx_dropped = adapter->stats.rnbc;
+ adapter->net_stats.rx_length_errors = adapter->stats.rlec;
+ adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
+ adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
+ adapter->net_stats.rx_fifo_errors = adapter->stats.mpc;
+ adapter->net_stats.rx_missed_errors = adapter->stats.mpc;
+
+ /* Tx Errors */
+
+ adapter->net_stats.tx_errors = adapter->stats.ecol +
+ adapter->stats.latecol;
+ adapter->net_stats.tx_aborted_errors = adapter->stats.ecol;
+ adapter->net_stats.tx_window_errors = adapter->stats.latecol;
+ adapter->net_stats.tx_carrier_errors = adapter->stats.tncrs;
+
+ /* Tx Dropped needs to be maintained elsewhere */
+
+ /* Phy Stats */
+
+ if(hw->media_type == e1000_media_type_copper) {
+ if((adapter->link_speed == SPEED_1000) &&
+ (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
+ phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
+ adapter->phy_stats.idle_errors += phy_tmp;
+ }
+
+ if((hw->mac_type <= e1000_82546) &&
+ !e1000_read_phy_reg(hw, M88E1000_RX_ERR_CNTR, &phy_tmp))
+ adapter->phy_stats.receive_errors += phy_tmp;
+ }
+}
+
+/**
+ * e1000_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+
+static inline void
+e1000_irq_disable(struct e1000_adapter *adapter)
+{
+ atomic_inc(&adapter->irq_sem);
+ E1000_WRITE_REG(&adapter->hw, IMC, ~0);
+ E1000_WRITE_FLUSH(&adapter->hw);
+ synchronize_irq();
+}
+
+/**
+ * e1000_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+
+static inline void
+e1000_irq_enable(struct e1000_adapter *adapter)
+{
+ if(atomic_dec_and_test(&adapter->irq_sem)) {
+ E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK);
+ E1000_WRITE_FLUSH(&adapter->hw);
+ }
+}
+
+/**
+ * e1000_intr - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ * @pt_regs: CPU registers structure
+ **/
+
+static void
+e1000_intr(int irq, void *data, struct pt_regs *regs)
+{
+ struct net_device *netdev = data;
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t icr;
+ int i = E1000_MAX_INTR;
+
+ while(i && (icr = E1000_READ_REG(&adapter->hw, ICR))) {
+
+ if(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+ adapter->hw.get_link_status = 1;
+ mod_timer(&adapter->watchdog_timer, jiffies);
+ }
+
+ e1000_clean_rx_irq(adapter);
+ e1000_clean_tx_irq(adapter);
+ i--;
+
+ }
+}
+
+/**
+ * e1000_clean_tx_irq - Reclaim resources after transmit completes
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_tx_irq(struct e1000_adapter *adapter)
+{
+ struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_tx_desc *tx_desc;
+ int i;
+
+ i = tx_ring->next_to_clean;
+ tx_desc = E1000_TX_DESC(*tx_ring, i);
+
+ while(tx_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
+
+ if(tx_ring->buffer_info[i].dma) {
+
+ pci_unmap_page(pdev,
+ tx_ring->buffer_info[i].dma,
+ tx_ring->buffer_info[i].length,
+ PCI_DMA_TODEVICE);
+
+ tx_ring->buffer_info[i].dma = 0;
+ }
+
+ if(tx_ring->buffer_info[i].skb) {
+
+ dev_kfree_skb_any(tx_ring->buffer_info[i].skb);
+
+ tx_ring->buffer_info[i].skb = NULL;
+ }
+
+ tx_desc->upper.data = 0;
+
+ i = (i + 1) % tx_ring->count;
+ tx_desc = E1000_TX_DESC(*tx_ring, i);
+ }
+
+ tx_ring->next_to_clean = i;
+
+ if(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) &&
+ (E1000_DESC_UNUSED(tx_ring) > E1000_TX_QUEUE_WAKE)) {
+
+ netif_wake_queue(netdev);
+ }
+}
+
+/**
+ * e1000_clean_rx_irq - Send received data up the network stack,
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_rx_irq(struct e1000_adapter *adapter)
+{
+ struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_rx_desc *rx_desc;
+ struct sk_buff *skb;
+ unsigned long flags;
+ uint32_t length;
+ uint8_t last_byte;
+ int i;
+
+ i = rx_ring->next_to_clean;
+ rx_desc = E1000_RX_DESC(*rx_ring, i);
+
+ while(rx_desc->status & E1000_RXD_STAT_DD) {
+
+ pci_unmap_single(pdev,
+ rx_ring->buffer_info[i].dma,
+ rx_ring->buffer_info[i].length,
+ PCI_DMA_FROMDEVICE);
+
+ skb = rx_ring->buffer_info[i].skb;
+ length = le16_to_cpu(rx_desc->length);
+
+ if(!(rx_desc->status & E1000_RXD_STAT_EOP)) {
+
+ /* All receives must fit into a single buffer */
+
+ E1000_DBG("Receive packet consumed multiple buffers\n");
+
+ dev_kfree_skb_irq(skb);
+ rx_desc->status = 0;
+ rx_ring->buffer_info[i].skb = NULL;
+
+ i = (i + 1) % rx_ring->count;
+
+ rx_desc = E1000_RX_DESC(*rx_ring, i);
+ continue;
+ }
+
+ if(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
+
+ last_byte = *(skb->data + length - 1);
+
+ if(TBI_ACCEPT(&adapter->hw, rx_desc->status,
+ rx_desc->errors, length, last_byte)) {
+
+ spin_lock_irqsave(&adapter->stats_lock, flags);
+
+ e1000_tbi_adjust_stats(&adapter->hw,
+ &adapter->stats,
+ length, skb->data);
+
+ spin_unlock_irqrestore(&adapter->stats_lock,
+ flags);
+ length--;
+ } else {
+
+ dev_kfree_skb_irq(skb);
+ rx_desc->status = 0;
+ rx_ring->buffer_info[i].skb = NULL;
+
+ i = (i + 1) % rx_ring->count;
+
+ rx_desc = E1000_RX_DESC(*rx_ring, i);
+ continue;
+ }
+ }
+
+ /* Good Receive */
+ skb_put(skb, length - ETHERNET_FCS_SIZE);
+
+ /* Receive Checksum Offload */
+ e1000_rx_checksum(adapter, rx_desc, skb);
+
+ skb->protocol = eth_type_trans(skb, netdev);
+ if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) {
+ vlan_hwaccel_rx(skb, adapter->vlgrp,
+ (rx_desc->special & E1000_RXD_SPC_VLAN_MASK));
+ } else {
+ netif_rx(skb);
+ }
+ netdev->last_rx = jiffies;
+
+ rx_desc->status = 0;
+ rx_ring->buffer_info[i].skb = NULL;
+
+ i = (i + 1) % rx_ring->count;
+
+ rx_desc = E1000_RX_DESC(*rx_ring, i);
+ }
+
+ rx_ring->next_to_clean = i;
+
+ e1000_alloc_rx_buffers(adapter);
+}
+
+/**
+ * e1000_alloc_rx_buffers - Replace used receive buffers
+ * @data: address of board private structure
+ **/
+
+static void
+e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
+{
+ struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_rx_desc *rx_desc;
+ struct sk_buff *skb;
+ int reserve_len;
+ int i;
+
+ reserve_len = 2;
+
+ i = rx_ring->next_to_use;
+
+ while(!rx_ring->buffer_info[i].skb) {
+ rx_desc = E1000_RX_DESC(*rx_ring, i);
+
+ skb = dev_alloc_skb(adapter->rx_buffer_len + reserve_len);
+
+ if(!skb) {
+ /* Better luck next round */
+ break;
+ }
+
+ /* Make buffer alignment 2 beyond a 16 byte boundary
+ * this will result in a 16 byte aligned IP header after
+ * the 14 byte MAC header is removed
+ */
+ skb_reserve(skb, reserve_len);
+
+ skb->dev = netdev;
+
+ rx_ring->buffer_info[i].skb = skb;
+ rx_ring->buffer_info[i].length = adapter->rx_buffer_len;
+ rx_ring->buffer_info[i].dma =
+ pci_map_single(pdev,
+ skb->data,
+ adapter->rx_buffer_len,
+ PCI_DMA_FROMDEVICE);
+
+ rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma);
+
+ if(!(i % E1000_RX_BUFFER_WRITE)) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+
+ E1000_WRITE_REG(&adapter->hw, RDT, i);
+ }
+
+ i = (i + 1) % rx_ring->count;
+ }
+
+ rx_ring->next_to_use = i;
+}
+
+/**
+ * e1000_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+
+static int
+e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+ case SIOCETHTOOL:
+ return e1000_ethtool_ioctl(netdev, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * e1000_rx_checksum - Receive Checksum Offload for 82543
+ * @adapter: board private structure
+ * @rx_desc: receive descriptor
+ * @sk_buff: socket buffer with received data
+ **/
+
+static inline void
+e1000_rx_checksum(struct e1000_adapter *adapter,
+ struct e1000_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ /* 82543 or newer only */
+ if((adapter->hw.mac_type < e1000_82543) ||
+ /* Ignore Checksum bit is set */
+ (rx_desc->status & E1000_RXD_STAT_IXSM) ||
+ /* TCP Checksum has not been calculated */
+ (!(rx_desc->status & E1000_RXD_STAT_TCPCS))) {
+ skb->ip_summed = CHECKSUM_NONE;
+ return;
+ }
+
+ /* At this point we know the hardware did the TCP checksum */
+ /* now look at the TCP checksum error bit */
+ if(rx_desc->errors & E1000_RXD_ERR_TCPE) {
+ /* let the stack verify checksum errors */
+ skb->ip_summed = CHECKSUM_NONE;
+ adapter->hw_csum_err++;
+ } else {
+ /* TCP checksum is good */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ adapter->hw_csum_good++;
+ }
+}
+
+void
+e1000_pci_set_mwi(struct e1000_hw *hw)
+{
+ struct e1000_adapter *adapter = hw->back;
+
+ pci_set_mwi(adapter->pdev);
+}
+
+void
+e1000_pci_clear_mwi(struct e1000_hw *hw)
+{
+ struct e1000_adapter *adapter = hw->back;
+
+ pci_clear_mwi(adapter->pdev);
+}
+
+void
+e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
+{
+ struct e1000_adapter *adapter = hw->back;
+
+ pci_read_config_word(adapter->pdev, reg, value);
+}
+
+void
+e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
+{
+ struct e1000_adapter *adapter = hw->back;
+
+ pci_write_config_word(adapter->pdev, reg, *value);
+}
+
+uint32_t
+e1000_io_read(struct e1000_hw *hw, uint32_t port)
+{
+ return inl(port);
+}
+
+void
+e1000_io_write(struct e1000_hw *hw, uint32_t port, uint32_t value)
+{
+ outl(value, port);
+}
+
+static void
+e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t ctrl, rctl;
+
+ e1000_irq_disable(adapter);
+ adapter->vlgrp = grp;
+
+ if(grp) {
+ /* enable VLAN tag insert/strip */
+
+ E1000_WRITE_REG(&adapter->hw, VET, ETHERNET_IEEE_VLAN_TYPE);
+
+ ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+ ctrl |= E1000_CTRL_VME;
+ E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+
+ /* enable VLAN receive filtering */
+
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ rctl |= E1000_RCTL_VFE;
+ rctl &= ~E1000_RCTL_CFIEN;
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+ } else {
+ /* disable VLAN tag insert/strip */
+
+ ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+ ctrl &= ~E1000_CTRL_VME;
+ E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+
+ /* disable VLAN filtering */
+
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ rctl &= ~E1000_RCTL_VFE;
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+ }
+
+ e1000_irq_enable(adapter);
+}
+
+static void
+e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t vfta, index;
+
+ /* add VID to filter table */
+
+ index = (vid >> 5) & 0x7F;
+ vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index);
+ vfta |= (1 << (vid & 0x1F));
+ e1000_write_vfta(&adapter->hw, index, vfta);
+}
+
+static void
+e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid)
+{
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t vfta, index;
+
+ e1000_irq_disable(adapter);
+
+ if(adapter->vlgrp)
+ adapter->vlgrp->vlan_devices[vid] = NULL;
+
+ e1000_irq_enable(adapter);
+
+ /* remove VID from filter table*/
+
+ index = (vid >> 5) & 0x7F;
+ vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index);
+ vfta &= ~(1 << (vid & 0x1F));
+ e1000_write_vfta(&adapter->hw, index, vfta);
+}
+
+static void
+e1000_restore_vlan(struct e1000_adapter *adapter)
+{
+ e1000_vlan_rx_register(adapter->netdev, adapter->vlgrp);
+
+ if(adapter->vlgrp) {
+ uint16_t vid;
+ for(vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ if(!adapter->vlgrp->vlan_devices[vid])
+ continue;
+ e1000_vlan_rx_add_vid(adapter->netdev, vid);
+ }
+ }
+}
+
+static int
+e1000_notify_reboot(struct notifier_block *nb, unsigned long event, void *p)
+{
+ struct pci_dev *pdev = NULL;
+
+ switch(event) {
+ case SYS_DOWN:
+ case SYS_HALT:
+ case SYS_POWER_OFF:
+ pci_for_each_dev(pdev) {
+ if(pci_dev_driver(pdev) == &e1000_driver)
+ e1000_suspend(pdev, 3);
+ }
+ }
+ return NOTIFY_DONE;
+}
+
+static int
+e1000_suspend(struct pci_dev *pdev, uint32_t state)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t ctrl, ctrl_ext, rctl, manc, status;
+ uint32_t wufc = adapter->wol;
+
+ netif_device_detach(netdev);
+
+ if(netif_running(netdev))
+ e1000_down(adapter);
+
+ status = E1000_READ_REG(&adapter->hw, STATUS);
+ if(status & E1000_STATUS_LU)
+ wufc &= ~E1000_WUFC_LNKC;
+
+ if(wufc) {
+ e1000_setup_rctl(adapter);
+ e1000_set_multi(netdev);
+
+ /* turn on all-multi mode if wake on multicast is enabled */
+ if(adapter->wol & E1000_WUFC_MC) {
+ rctl = E1000_READ_REG(&adapter->hw, RCTL);
+ rctl |= E1000_RCTL_MPE;
+ E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+ }
+
+ if(adapter->hw.mac_type >= e1000_82540) {
+ ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+ /* advertise wake from D3Cold */
+ #define E1000_CTRL_ADVD3WUC 0x00100000
+ /* phy power management enable */
+ #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
+ ctrl |= E1000_CTRL_ADVD3WUC |
+ E1000_CTRL_EN_PHY_PWR_MGMT;
+ E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+ }
+
+ if(adapter->hw.media_type == e1000_media_type_fiber) {
+ /* keep the laser running in D3 */
+ ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_SDP7_DATA;
+ E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ctrl_ext);
+ }
+
+ E1000_WRITE_REG(&adapter->hw, WUC, E1000_WUC_PME_EN);
+ E1000_WRITE_REG(&adapter->hw, WUFC, wufc);
+ pci_enable_wake(pdev, 3, 1);
+ pci_enable_wake(pdev, 4, 1); /* 4 == D3 cold */
+ } else {
+ E1000_WRITE_REG(&adapter->hw, WUC, 0);
+ E1000_WRITE_REG(&adapter->hw, WUFC, 0);
+ pci_enable_wake(pdev, 3, 0);
+ pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
+ }
+
+ pci_save_state(pdev, adapter->pci_state);
+
+ if(adapter->hw.mac_type >= e1000_82540) {
+ manc = E1000_READ_REG(&adapter->hw, MANC);
+ if(manc & E1000_MANC_SMBUS_EN) {
+ manc |= E1000_MANC_ARP_EN;
+ E1000_WRITE_REG(&adapter->hw, MANC, manc);
+ state = 0;
+ }
+ }
+
+ state = (state > 0) ? 3 : 0;
+ pci_set_power_state(pdev, state);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int
+e1000_resume(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct e1000_adapter *adapter = netdev->priv;
+ uint32_t manc;
+
+ pci_set_power_state(pdev, 0);
+ pci_restore_state(pdev, adapter->pci_state);
+
+ pci_enable_wake(pdev, 3, 0);
+ pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
+
+ e1000_reset(adapter);
+ E1000_WRITE_REG(&adapter->hw, WUS, ~0);
+
+ if(netif_running(netdev))
+ e1000_up(adapter);
+
+ netif_device_attach(netdev);
+
+ if(adapter->hw.mac_type >= e1000_82540) {
+ manc = E1000_READ_REG(&adapter->hw, MANC);
+ manc &= ~(E1000_MANC_ARP_EN);
+ E1000_WRITE_REG(&adapter->hw, MANC, manc);
+ }
+
+ return 0;
+}
+#endif
+
+/* e1000_main.c */
diff --git a/xen/drivers/net/e1000/e1000_osdep.h b/xen/drivers/net/e1000/e1000_osdep.h
new file mode 100644
index 0000000000..40b62bfecd
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_osdep.h
@@ -0,0 +1,112 @@
+/*******************************************************************************
+
+
+ Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* glue for the OS independant part of e1000
+ * includes register access macros
+ */
+
+#ifndef _E1000_OSDEP_H_
+#define _E1000_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+
+#ifndef msec_delay
+#define msec_delay(x) {\
+ int s=jiffies+1+((x*HZ)/1000); \
+ while(jiffies<s); }
+
+#if 0
+/******************** NOT in XEN ! *******/
+#define XXXXmsec_delay(x) do { if(in_interrupt()) { \
+ /* Don't mdelay in interrupt context! */ \
+ BUG(); \
+ } else { \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ schedule_timeout((x * HZ)/1000); \
+ } } while(0)
+#endif
+
+#else
+#error "msec already defined!"
+#endif
+
+#define PCI_COMMAND_REGISTER PCI_COMMAND
+#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE
+
+typedef enum {
+ FALSE = 0,
+ TRUE = 1
+} boolean_t;
+
+#define MSGOUT(S, A, B) printk(KERN_DEBUG S "\n", A, B)
+
+//#define DBG 1
+
+#if DBG
+#define DEBUGOUT(S) printk(KERN_DEBUG S "\n")
+#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S "\n", A)
+#else
+#define DEBUGOUT(S)
+#define DEBUGOUT1(S, A...)
+#endif
+
+#define DEBUGFUNC(F) DEBUGOUT(F)
+#define DEBUGOUT2 DEBUGOUT1
+#define DEBUGOUT3 DEBUGOUT2
+#define DEBUGOUT7 DEBUGOUT3
+
+
+#define E1000_WRITE_REG(a, reg, value) ( \
+ ((a)->mac_type >= e1000_82543) ? \
+ (writel((value), ((a)->hw_addr + E1000_##reg))) : \
+ (writel((value), ((a)->hw_addr + E1000_82542_##reg))))
+
+#define E1000_READ_REG(a, reg) ( \
+ ((a)->mac_type >= e1000_82543) ? \
+ readl((a)->hw_addr + E1000_##reg) : \
+ readl((a)->hw_addr + E1000_82542_##reg))
+
+#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
+ ((a)->mac_type >= e1000_82543) ? \
+ writel((value), ((a)->hw_addr + E1000_##reg + ((offset) << 2))) : \
+ writel((value), ((a)->hw_addr + E1000_82542_##reg + ((offset) << 2))))
+
+#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
+ ((a)->mac_type >= e1000_82543) ? \
+ readl((a)->hw_addr + E1000_##reg + ((offset) << 2)) : \
+ readl((a)->hw_addr + E1000_82542_##reg + ((offset) << 2)))
+
+#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, STATUS)
+
+#endif /* _E1000_OSDEP_H_ */
diff --git a/xen/drivers/net/e1000/e1000_param.c b/xen/drivers/net/e1000/e1000_param.c
new file mode 100644
index 0000000000..a11941f3f2
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_param.c
@@ -0,0 +1,655 @@
+/*******************************************************************************
+
+
+ Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000.h"
+
+/* This is the only thing that needs to be changed to adjust the
+ * maximum number of ports that the driver can manage.
+ */
+
+#define E1000_MAX_NIC 32
+
+#define OPTION_UNSET -1
+#define OPTION_DISABLED 0
+#define OPTION_ENABLED 1
+
+/* Module Parameters are always initialized to -1, so that the driver
+ * can tell the difference between no user specified value or the
+ * user asking for the default value.
+ * The true default values are loaded in when e1000_check_options is called.
+ *
+ * This is a GCC extension to ANSI C.
+ * See the item "Labeled Elements in Initializers" in the section
+ * "Extensions to the C Language Family" of the GCC documentation.
+ */
+
+#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
+
+/* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+
+#define E1000_PARAM(X, S) \
+static const int __devinitdata X[E1000_MAX_NIC + 1] = E1000_PARAM_INIT; \
+MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \
+MODULE_PARM_DESC(X, S);
+
+/* Transmit Descriptor Count
+ *
+ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
+ * Valid Range: 80-4096 for 82544
+ *
+ * Default Value: 256
+ */
+
+E1000_PARAM(TxDescriptors, "Number of transmit descriptors");
+
+/* Receive Descriptor Count
+ *
+ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
+ * Valid Range: 80-4096 for 82544
+ *
+ * Default Value: 80
+ */
+
+E1000_PARAM(RxDescriptors, "Number of receive descriptors");
+
+/* User Specified Speed Override
+ *
+ * Valid Range: 0, 10, 100, 1000
+ * - 0 - auto-negotiate at all supported speeds
+ * - 10 - only link at 10 Mbps
+ * - 100 - only link at 100 Mbps
+ * - 1000 - only link at 1000 Mbps
+ *
+ * Default Value: 0
+ */
+
+E1000_PARAM(Speed, "Speed setting");
+
+/* User Specified Duplex Override
+ *
+ * Valid Range: 0-2
+ * - 0 - auto-negotiate for duplex
+ * - 1 - only link at half duplex
+ * - 2 - only link at full duplex
+ *
+ * Default Value: 0
+ */
+
+E1000_PARAM(Duplex, "Duplex setting");
+
+/* Auto-negotiation Advertisement Override
+ *
+ * Valid Range: 0x01-0x0F, 0x20-0x2F
+ *
+ * The AutoNeg value is a bit mask describing which speed and duplex
+ * combinations should be advertised during auto-negotiation.
+ * The supported speed and duplex modes are listed below
+ *
+ * Bit 7 6 5 4 3 2 1 0
+ * Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10
+ * Duplex Full Full Half Full Half
+ *
+ * Default Value: 0x2F
+ */
+
+E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting");
+
+/* User Specified Flow Control Override
+ *
+ * Valid Range: 0-3
+ * - 0 - No Flow Control
+ * - 1 - Rx only, respond to PAUSE frames but do not generate them
+ * - 2 - Tx only, generate PAUSE frames but ignore them on receive
+ * - 3 - Full Flow Control Support
+ *
+ * Default Value: Read flow control settings from the EEPROM
+ */
+
+E1000_PARAM(FlowControl, "Flow Control setting");
+
+/* XsumRX - Receive Checksum Offload Enable/Disable
+ *
+ * Valid Range: 0, 1
+ * - 0 - disables all checksum offload
+ * - 1 - enables receive IP/TCP/UDP checksum offload
+ * on 82543 based NICs
+ *
+ * Default Value: 1
+ */
+
+E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload");
+
+/* Transmit Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 64
+ */
+
+E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
+
+/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 0
+ */
+
+E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
+
+/* Receive Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 0/128
+ */
+
+E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
+
+/* Receive Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 128
+ */
+
+E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
+
+#define AUTONEG_ADV_DEFAULT 0x2F
+#define AUTONEG_ADV_MASK 0x2F
+#define FLOW_CONTROL_DEFAULT FLOW_CONTROL_FULL
+
+#define DEFAULT_TXD 256
+#define MAX_TXD 256
+#define MIN_TXD 80
+#define MAX_82544_TXD 4096
+
+#define DEFAULT_RXD 80
+#define MAX_RXD 256
+#define MIN_RXD 80
+#define MAX_82544_RXD 4096
+
+#define DEFAULT_RDTR 0
+#define MAX_RXDELAY 0xFFFF
+#define MIN_RXDELAY 0
+
+#define DEFAULT_RADV 128
+#define MAX_RXABSDELAY 0xFFFF
+#define MIN_RXABSDELAY 0
+
+#define DEFAULT_TIDV 64
+#define MAX_TXDELAY 0xFFFF
+#define MIN_TXDELAY 0
+
+#define DEFAULT_TADV 64
+#define MAX_TXABSDELAY 0xFFFF
+#define MIN_TXABSDELAY 0
+
+struct e1000_option {
+ enum { enable_option, range_option, list_option } type;
+ char *name;
+ char *err;
+ int def;
+ union {
+ struct { /* range_option info */
+ int min;
+ int max;
+ } r;
+ struct { /* list_option info */
+ int nr;
+ struct e1000_opt_list { int i; char *str; } *p;
+ } l;
+ } arg;
+};
+
+static int __devinit
+e1000_validate_option(int *value, struct e1000_option *opt)
+{
+ if(*value == OPTION_UNSET) {
+ *value = opt->def;
+ return 0;
+ }
+
+ switch (opt->type) {
+ case enable_option:
+ switch (*value) {
+ case OPTION_ENABLED:
+ printk(KERN_INFO "%s Enabled\n", opt->name);
+ return 0;
+ case OPTION_DISABLED:
+ printk(KERN_INFO "%s Disabled\n", opt->name);
+ return 0;
+ }
+ break;
+ case range_option:
+ if(*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
+ printk(KERN_INFO "%s set to %i\n", opt->name, *value);
+ return 0;
+ }
+ break;
+ case list_option: {
+ int i;
+ struct e1000_opt_list *ent;
+
+ for(i = 0; i < opt->arg.l.nr; i++) {
+ ent = &opt->arg.l.p[i];
+ if(*value == ent->i) {
+ if(ent->str[0] != '\0')
+ printk(KERN_INFO "%s\n", ent->str);
+ return 0;
+ }
+ }
+ }
+ break;
+ default:
+ BUG();
+ }
+
+ printk(KERN_INFO "Invalid %s specified (%i) %s\n",
+ opt->name, *value, opt->err);
+ *value = opt->def;
+ return -1;
+}
+
+static void e1000_check_fiber_options(struct e1000_adapter *adapter);
+static void e1000_check_copper_options(struct e1000_adapter *adapter);
+
+/**
+ * e1000_check_options - Range Checking for Command Line Parameters
+ * @adapter: board private structure
+ *
+ * This routine checks all command line paramters for valid user
+ * input. If an invalid value is given, or if no user specified
+ * value exists, a default value is used. The final value is stored
+ * in a variable in the adapter structure.
+ **/
+
+void __devinit
+e1000_check_options(struct e1000_adapter *adapter)
+{
+ int bd = adapter->bd_number;
+ if(bd >= E1000_MAX_NIC) {
+ printk(KERN_NOTICE
+ "Warning: no configuration for board #%i\n", bd);
+ printk(KERN_NOTICE "Using defaults for all values\n");
+ bd = E1000_MAX_NIC;
+ }
+
+ { /* Transmit Descriptor Count */
+ struct e1000_option opt = {
+ .type = range_option,
+ .name = "Transmit Descriptors",
+ .err = "using default of " __MODULE_STRING(DEFAULT_TXD),
+ .def = DEFAULT_TXD,
+ .arg = { .r = { .min = MIN_TXD }}
+ };
+ struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+ e1000_mac_type mac_type = adapter->hw.mac_type;
+ opt.arg.r.max = mac_type < e1000_82544 ?
+ MAX_TXD : MAX_82544_TXD;
+
+ tx_ring->count = TxDescriptors[bd];
+ e1000_validate_option(&tx_ring->count, &opt);
+ E1000_ROUNDUP(tx_ring->count, REQ_TX_DESCRIPTOR_MULTIPLE);
+ }
+ { /* Receive Descriptor Count */
+ struct e1000_option opt = {
+ .type = range_option,
+ .name = "Receive Descriptors",
+ .err = "using default of " __MODULE_STRING(DEFAULT_RXD),
+ .def = DEFAULT_RXD,
+ .arg = { .r = { .min = MIN_RXD }}
+ };
+ struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+ e1000_mac_type mac_type = adapter->hw.mac_type;
+ opt.arg.r.max = mac_type < e1000_82544 ? MAX_RXD : MAX_82544_RXD;
+
+ rx_ring->count = RxDescriptors[bd];
+ e1000_validate_option(&rx_ring->count, &opt);
+ E1000_ROUNDUP(rx_ring->count, REQ_RX_DESCRIPTOR_MULTIPLE);
+ }
+ { /* Checksum Offload Enable/Disable */
+ struct e1000_option opt = {
+ .type = enable_option,
+ .name = "Checksum Offload",
+ .err = "defaulting to Enabled",
+ .def = OPTION_ENABLED
+ };
+
+ int rx_csum = XsumRX[bd];
+ e1000_validate_option(&rx_csum, &opt);
+ adapter->rx_csum = rx_csum;
+ }
+ { /* Flow Control */
+
+ struct e1000_opt_list fc_list[] =
+ {{ e1000_fc_none, "Flow Control Disabled" },
+ { e1000_fc_rx_pause,"Flow Control Receive Only" },
+ { e1000_fc_tx_pause,"Flow Control Transmit Only" },
+ { e1000_fc_full, "Flow Control Enabled" },
+ { e1000_fc_default, "Flow Control Hardware Default" }};
+
+ struct e1000_option opt = {
+ .type = list_option,
+ .name = "Flow Control",
+ .err = "reading default settings from EEPROM",
+ .def = e1000_fc_default,
+ .arg = { .l = { .nr = ARRAY_SIZE(fc_list), .p = fc_list }}
+ };
+
+ int fc = FlowControl[bd];
+ e1000_validate_option(&fc, &opt);
+ adapter->hw.fc = adapter->hw.original_fc = fc;
+ }
+ { /* Transmit Interrupt Delay */
+ char *tidv = "using default of " __MODULE_STRING(DEFAULT_TIDV);
+ struct e1000_option opt = {
+ .type = range_option,
+ .name = "Transmit Interrupt Delay",
+ .arg = { .r = { .min = MIN_TXDELAY, .max = MAX_TXDELAY }}
+ };
+ opt.def = DEFAULT_TIDV;
+ opt.err = tidv;
+
+ adapter->tx_int_delay = TxIntDelay[bd];
+ e1000_validate_option(&adapter->tx_int_delay, &opt);
+ }
+ { /* Transmit Absolute Interrupt Delay */
+ char *tadv = "using default of " __MODULE_STRING(DEFAULT_TADV);
+ struct e1000_option opt = {
+ .type = range_option,
+ .name = "Transmit Absolute Interrupt Delay",
+ .arg = { .r = { .min = MIN_TXABSDELAY, .max = MAX_TXABSDELAY }}
+ };
+ opt.def = DEFAULT_TADV;
+ opt.err = tadv;
+
+ adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
+ e1000_validate_option(&adapter->tx_abs_int_delay, &opt);
+ }
+ { /* Receive Interrupt Delay */
+ char *rdtr = "using default of " __MODULE_STRING(DEFAULT_RDTR);
+ struct e1000_option opt = {
+ .type = range_option,
+ .name = "Receive Interrupt Delay",
+ .arg = { .r = { .min = MIN_RXDELAY, .max = MAX_RXDELAY }}
+ };
+ opt.def = DEFAULT_RDTR;
+ opt.err = rdtr;
+
+ adapter->rx_int_delay = RxIntDelay[bd];
+ e1000_validate_option(&adapter->rx_int_delay, &opt);
+ }
+ { /* Receive Absolute Interrupt Delay */
+ char *radv = "using default of " __MODULE_STRING(DEFAULT_RADV);
+ struct e1000_option opt = {
+ .type = range_option,
+ .name = "Receive Absolute Interrupt Delay",
+ .arg = { .r = { .min = MIN_RXABSDELAY, .max = MAX_RXABSDELAY }}
+ };
+ opt.def = DEFAULT_RADV;
+ opt.err = radv;
+
+ adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
+ e1000_validate_option(&adapter->rx_abs_int_delay, &opt);
+ }
+
+ switch(adapter->hw.media_type) {
+ case e1000_media_type_fiber:
+ e1000_check_fiber_options(adapter);
+ break;
+ case e1000_media_type_copper:
+ e1000_check_copper_options(adapter);
+ break;
+ default:
+ BUG();
+ }
+}
+
+/**
+ * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version
+ * @adapter: board private structure
+ *
+ * Handles speed and duplex options on fiber adapters
+ **/
+
+static void __devinit
+e1000_check_fiber_options(struct e1000_adapter *adapter)
+{
+ int bd = adapter->bd_number;
+ bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
+
+ if((Speed[bd] != OPTION_UNSET)) {
+ printk(KERN_INFO "Speed not valid for fiber adapters, "
+ "parameter ignored\n");
+ }
+ if((Duplex[bd] != OPTION_UNSET)) {
+ printk(KERN_INFO "Duplex not valid for fiber adapters, "
+ "parameter ignored\n");
+ }
+ if((AutoNeg[bd] != OPTION_UNSET)) {
+ printk(KERN_INFO "AutoNeg not valid for fiber adapters, "
+ "parameter ignored\n");
+ }
+}
+
+/**
+ * e1000_check_copper_options - Range Checking for Link Options, Copper Version
+ * @adapter: board private structure
+ *
+ * Handles speed and duplex options on copper adapters
+ **/
+
+static void __devinit
+e1000_check_copper_options(struct e1000_adapter *adapter)
+{
+ int speed, dplx;
+ int bd = adapter->bd_number;
+ bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
+
+ { /* Speed */
+ struct e1000_opt_list speed_list[] = {{ 0, "" },
+ { SPEED_10, "" },
+ { SPEED_100, "" },
+ { SPEED_1000, "" }};
+
+ struct e1000_option opt = {
+ .type = list_option,
+ .name = "Speed",
+ .err = "parameter ignored",
+ .def = 0,
+ .arg = { .l = { .nr = ARRAY_SIZE(speed_list), .p = speed_list }}
+ };
+
+ speed = Speed[bd];
+ e1000_validate_option(&speed, &opt);
+ }
+ { /* Duplex */
+ struct e1000_opt_list dplx_list[] = {{ 0, "" },
+ { HALF_DUPLEX, "" },
+ { FULL_DUPLEX, "" }};
+
+ struct e1000_option opt = {
+ .type = list_option,
+ .name = "Duplex",
+ .err = "parameter ignored",
+ .def = 0,
+ .arg = { .l = { .nr = ARRAY_SIZE(dplx_list), .p = dplx_list }}
+ };
+
+ dplx = Duplex[bd];
+ e1000_validate_option(&dplx, &opt);
+ }
+
+ if(AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) {
+ printk(KERN_INFO
+ "AutoNeg specified along with Speed or Duplex, "
+ "parameter ignored\n");
+ adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
+ } else { /* Autoneg */
+ struct e1000_opt_list an_list[] =
+ #define AA "AutoNeg advertising "
+ {{ 0x01, AA "10/HD" },
+ { 0x02, AA "10/FD" },
+ { 0x03, AA "10/FD, 10/HD" },
+ { 0x04, AA "100/HD" },
+ { 0x05, AA "100/HD, 10/HD" },
+ { 0x06, AA "100/HD, 10/FD" },
+ { 0x07, AA "100/HD, 10/FD, 10/HD" },
+ { 0x08, AA "100/FD" },
+ { 0x09, AA "100/FD, 10/HD" },
+ { 0x0a, AA "100/FD, 10/FD" },
+ { 0x0b, AA "100/FD, 10/FD, 10/HD" },
+ { 0x0c, AA "100/FD, 100/HD" },
+ { 0x0d, AA "100/FD, 100/HD, 10/HD" },
+ { 0x0e, AA "100/FD, 100/HD, 10/FD" },
+ { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" },
+ { 0x20, AA "1000/FD" },
+ { 0x21, AA "1000/FD, 10/HD" },
+ { 0x22, AA "1000/FD, 10/FD" },
+ { 0x23, AA "1000/FD, 10/FD, 10/HD" },
+ { 0x24, AA "1000/FD, 100/HD" },
+ { 0x25, AA "1000/FD, 100/HD, 10/HD" },
+ { 0x26, AA "1000/FD, 100/HD, 10/FD" },
+ { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" },
+ { 0x28, AA "1000/FD, 100/FD" },
+ { 0x29, AA "1000/FD, 100/FD, 10/HD" },
+ { 0x2a, AA "1000/FD, 100/FD, 10/FD" },
+ { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" },
+ { 0x2c, AA "1000/FD, 100/FD, 100/HD" },
+ { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" },
+ { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" },
+ { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }};
+
+ struct e1000_option opt = {
+ .type = list_option,
+ .name = "AutoNeg",
+ .err = "parameter ignored",
+ .def = AUTONEG_ADV_DEFAULT,
+ .arg = { .l = { .nr = ARRAY_SIZE(an_list), .p = an_list }}
+ };
+
+ int an = AutoNeg[bd];
+ e1000_validate_option(&an, &opt);
+ adapter->hw.autoneg_advertised = an;
+ }
+
+ switch (speed + dplx) {
+ case 0:
+ adapter->hw.autoneg = 1;
+ if(Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET)
+ printk(KERN_INFO
+ "Speed and duplex autonegotiation enabled\n");
+ break;
+ case HALF_DUPLEX:
+ printk(KERN_INFO "Half Duplex specified without Speed\n");
+ printk(KERN_INFO "Using Autonegotiation at Half Duplex only\n");
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+ ADVERTISE_100_HALF;
+ break;
+ case FULL_DUPLEX:
+ printk(KERN_INFO "Full Duplex specified without Speed\n");
+ printk(KERN_INFO "Using Autonegotiation at Full Duplex only\n");
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_10_FULL |
+ ADVERTISE_100_FULL |
+ ADVERTISE_1000_FULL;
+ break;
+ case SPEED_10:
+ printk(KERN_INFO "10 Mbps Speed specified without Duplex\n");
+ printk(KERN_INFO "Using Autonegotiation at 10 Mbps only\n");
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+ ADVERTISE_10_FULL;
+ break;
+ case SPEED_10 + HALF_DUPLEX:
+ printk(KERN_INFO "Forcing to 10 Mbps Half Duplex\n");
+ adapter->hw.autoneg = 0;
+ adapter->hw.forced_speed_duplex = e1000_10_half;
+ adapter->hw.autoneg_advertised = 0;
+ break;
+ case SPEED_10 + FULL_DUPLEX:
+ printk(KERN_INFO "Forcing to 10 Mbps Full Duplex\n");
+ adapter->hw.autoneg = 0;
+ adapter->hw.forced_speed_duplex = e1000_10_full;
+ adapter->hw.autoneg_advertised = 0;
+ break;
+ case SPEED_100:
+ printk(KERN_INFO "100 Mbps Speed specified without Duplex\n");
+ printk(KERN_INFO "Using Autonegotiation at 100 Mbps only\n");
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_100_HALF |
+ ADVERTISE_100_FULL;
+ break;
+ case SPEED_100 + HALF_DUPLEX:
+ printk(KERN_INFO "Forcing to 100 Mbps Half Duplex\n");
+ adapter->hw.autoneg = 0;
+ adapter->hw.forced_speed_duplex = e1000_100_half;
+ adapter->hw.autoneg_advertised = 0;
+ break;
+ case SPEED_100 + FULL_DUPLEX:
+ printk(KERN_INFO "Forcing to 100 Mbps Full Duplex\n");
+ adapter->hw.autoneg = 0;
+ adapter->hw.forced_speed_duplex = e1000_100_full;
+ adapter->hw.autoneg_advertised = 0;
+ break;
+ case SPEED_1000:
+ printk(KERN_INFO "1000 Mbps Speed specified without Duplex\n");
+ printk(KERN_INFO
+ "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+ break;
+ case SPEED_1000 + HALF_DUPLEX:
+ printk(KERN_INFO "Half Duplex is not supported at 1000 Mbps\n");
+ printk(KERN_INFO
+ "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+ break;
+ case SPEED_1000 + FULL_DUPLEX:
+ printk(KERN_INFO
+ "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
+ adapter->hw.autoneg = 1;
+ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+ break;
+ default:
+ BUG();
+ }
+
+ /* Speed, AutoNeg and MDI/MDI-X must all play nice */
+ if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) {
+ printk(KERN_INFO "Speed, AutoNeg and MDI-X specifications are "
+ "incompatible. Setting MDI-X to a compatible value.\n");
+ }
+}
+
diff --git a/xen/drivers/net/ne/8390.c b/xen/drivers/net/ne/8390.c
new file mode 100644
index 0000000000..aa299a3470
--- /dev/null
+++ b/xen/drivers/net/ne/8390.c
@@ -0,0 +1,1158 @@
+/* 8390.c: A general NS8390 ethernet driver core for linux. */
+/*
+ Written 1992-94 by Donald Becker.
+
+ Copyright 1993 United States Government as represented by the
+ Director, National Security Agency.
+
+ This software may be used and distributed according to the terms
+ of the GNU General Public License, incorporated herein by reference.
+
+ The author may be reached as becker@scyld.com, or C/O
+ Scyld Computing Corporation
+ 410 Severn Ave., Suite 210
+ Annapolis MD 21403
+
+
+ This is the chip-specific code for many 8390-based ethernet adaptors.
+ This is not a complete driver, it must be combined with board-specific
+ code such as ne.c, wd.c, 3c503.c, etc.
+
+ Seeing how at least eight drivers use this code, (not counting the
+ PCMCIA ones either) it is easy to break some card by what seems like
+ a simple innocent change. Please contact me or Donald if you think
+ you have found something that needs changing. -- PG
+
+
+ Changelog:
+
+ Paul Gortmaker : remove set_bit lock, other cleanups.
+ Paul Gortmaker : add ei_get_8390_hdr() so we can pass skb's to
+ ei_block_input() for eth_io_copy_and_sum().
+ Paul Gortmaker : exchange static int ei_pingpong for a #define,
+ also add better Tx error handling.
+ Paul Gortmaker : rewrite Rx overrun handling as per NS specs.
+ Alexey Kuznetsov : use the 8390's six bit hash multicast filter.
+ Paul Gortmaker : tweak ANK's above multicast changes a bit.
+ Paul Gortmaker : update packet statistics for v2.1.x
+ Alan Cox : support arbitary stupid port mappings on the
+ 68K Macintosh. Support >16bit I/O spaces
+ Paul Gortmaker : add kmod support for auto-loading of the 8390
+ module by all drivers that require it.
+ Alan Cox : Spinlocking work, added 'BUG_83C690'
+ Paul Gortmaker : Separate out Tx timeout code from Tx path.
+
+ Sources:
+ The National Semiconductor LAN Databook, and the 3Com 3c503 databook.
+
+ */
+
+static const char version[] =
+ "8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
+
+#include <xeno/module.h>
+#include <xeno/kernel.h>
+#include <xeno/sched.h>
+//#include <xeno/fs.h>
+#include <xeno/types.h>
+//#include <xeno/ptrace.h>
+#include <xeno/lib.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <xeno/delay.h>
+#include <xeno/errno.h>
+//#include <xeno/fcntl.h>
+#include <xeno/in.h>
+#include <xeno/interrupt.h>
+#include <xeno/init.h>
+
+#include <xeno/netdevice.h>
+#include <xeno/etherdevice.h>
+
+#define NS8390_CORE
+#include "8390.h"
+
+#define BUG_83C690
+
+/* These are the operational function interfaces to board-specific
+ routines.
+ void reset_8390(struct net_device *dev)
+ Resets the board associated with DEV, including a hardware reset of
+ the 8390. This is only called when there is a transmit timeout, and
+ it is always followed by 8390_init().
+ void block_output(struct net_device *dev, int count, const unsigned char *buf,
+ int start_page)
+ Write the COUNT bytes of BUF to the packet buffer at START_PAGE. The
+ "page" value uses the 8390's 256-byte pages.
+ void get_8390_hdr(struct net_device *dev, struct e8390_hdr *hdr, int ring_page)
+ Read the 4 byte, page aligned 8390 header. *If* there is a
+ subsequent read, it will be of the rest of the packet.
+ void block_input(struct net_device *dev, int count, struct sk_buff *skb, int ring_offset)
+ Read COUNT bytes from the packet buffer into the skb data area. Start
+ reading from RING_OFFSET, the address as the 8390 sees it. This will always
+ follow the read of the 8390 header.
+*/
+#define ei_reset_8390 (ei_local->reset_8390)
+#define ei_block_output (ei_local->block_output)
+#define ei_block_input (ei_local->block_input)
+#define ei_get_8390_hdr (ei_local->get_8390_hdr)
+
+/* use 0 for production, 1 for verification, >2 for debug */
+#ifndef ei_debug
+int ei_debug = 1;
+#endif
+
+/* Index to functions. */
+static void ei_tx_intr(struct net_device *dev);
+static void ei_tx_err(struct net_device *dev);
+static void ei_tx_timeout(struct net_device *dev);
+static void ei_receive(struct net_device *dev);
+static void ei_rx_overrun(struct net_device *dev);
+
+/* Routines generic to NS8390-based boards. */
+static void NS8390_trigger_send(struct net_device *dev, unsigned int length,
+ int start_page);
+static void set_multicast_list(struct net_device *dev);
+static void do_set_multicast_list(struct net_device *dev);
+
+/*
+ * SMP and the 8390 setup.
+ *
+ * The 8390 isnt exactly designed to be multithreaded on RX/TX. There is
+ * a page register that controls bank and packet buffer access. We guard
+ * this with ei_local->page_lock. Nobody should assume or set the page other
+ * than zero when the lock is not held. Lock holders must restore page 0
+ * before unlocking. Even pure readers must take the lock to protect in
+ * page 0.
+ *
+ * To make life difficult the chip can also be very slow. We therefore can't
+ * just use spinlocks. For the longer lockups we disable the irq the device
+ * sits on and hold the lock. We must hold the lock because there is a dual
+ * processor case other than interrupts (get stats/set multicast list in
+ * parallel with each other and transmit).
+ *
+ * Note: in theory we can just disable the irq on the card _but_ there is
+ * a latency on SMP irq delivery. So we can easily go "disable irq" "sync irqs"
+ * enter lock, take the queued irq. So we waddle instead of flying.
+ *
+ * Finally by special arrangement for the purpose of being generally
+ * annoying the transmit function is called bh atomic. That places
+ * restrictions on the user context callers as disable_irq won't save
+ * them.
+ */
+
+
+
+/**
+ * ei_open - Open/initialize the board.
+ * @dev: network device to initialize
+ *
+ * This routine goes all-out, setting everything
+ * up anew at each open, even though many of these registers should only
+ * need to be set once at boot.
+ */
+int ei_open(struct net_device *dev)
+{
+ unsigned long flags;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+
+ /* This can't happen unless somebody forgot to call ethdev_init(). */
+ if (ei_local == NULL)
+ {
+ printk(KERN_EMERG "%s: ei_open passed a non-existent device!\n", dev->name);
+ return -ENXIO;
+ }
+
+ /* The card I/O part of the driver (e.g. 3c503) can hook a Tx timeout
+ wrapper that does e.g. media check & then calls ei_tx_timeout. */
+ if (dev->tx_timeout == NULL)
+ dev->tx_timeout = ei_tx_timeout;
+ if (dev->watchdog_timeo <= 0)
+ dev->watchdog_timeo = TX_TIMEOUT;
+
+ /*
+ * Grab the page lock so we own the register set, then call
+ * the init function.
+ */
+
+ spin_lock_irqsave(&ei_local->page_lock, flags);
+ NS8390_init(dev, 1);
+ /* Set the flag before we drop the lock, That way the IRQ arrives
+ after its set and we get no silly warnings */
+ netif_start_queue(dev);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
+ ei_local->irqlock = 0;
+ return 0;
+}
+
+/**
+ * ei_close - shut down network device
+ * @dev: network device to close
+ *
+ * Opposite of ei_open(). Only used when "ifconfig <devname> down" is done.
+ */
+int ei_close(struct net_device *dev)
+{
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ unsigned long flags;
+
+ /*
+ * Hold the page lock during close
+ */
+
+ spin_lock_irqsave(&ei_local->page_lock, flags);
+ NS8390_init(dev, 0);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
+ netif_stop_queue(dev);
+ return 0;
+}
+
+/**
+ * ei_tx_timeout - handle transmit time out condition
+ * @dev: network device which has apparently fallen asleep
+ *
+ * Called by kernel when device never acknowledges a transmit has
+ * completed (or failed) - i.e. never posted a Tx related interrupt.
+ */
+
+void ei_tx_timeout(struct net_device *dev)
+{
+ long e8390_base = dev->base_addr;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ int txsr, isr, tickssofar = jiffies - dev->trans_start;
+ unsigned long flags;
+
+ ei_local->stat.tx_errors++;
+
+ spin_lock_irqsave(&ei_local->page_lock, flags);
+ txsr = inb(e8390_base+EN0_TSR);
+ isr = inb(e8390_base+EN0_ISR);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
+
+ printk(KERN_DEBUG "%s: Tx timed out, %s TSR=%#2x, ISR=%#2x, t=%d.\n",
+ dev->name, (txsr & ENTSR_ABT) ? "excess collisions." :
+ (isr) ? "lost interrupt?" : "cable problem?", txsr, isr, tickssofar);
+
+ if (!isr && !ei_local->stat.tx_packets)
+ {
+ /* The 8390 probably hasn't gotten on the cable yet. */
+ ei_local->interface_num ^= 1; /* Try a different xcvr. */
+ }
+
+ /* Ugly but a reset can be slow, yet must be protected */
+
+ disable_irq_nosync(dev->irq);
+ spin_lock(&ei_local->page_lock);
+
+ /* Try to restart the card. Perhaps the user has fixed something. */
+ ei_reset_8390(dev);
+ NS8390_init(dev, 1);
+
+ spin_unlock(&ei_local->page_lock);
+ enable_irq(dev->irq);
+ netif_wake_queue(dev);
+}
+
+/**
+ * ei_start_xmit - begin packet transmission
+ * @skb: packet to be sent
+ * @dev: network device to which packet is sent
+ *
+ * Sends a packet to an 8390 network device.
+ */
+
+static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ long e8390_base = dev->base_addr;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ int length, send_length, output_page;
+ unsigned long flags;
+
+ length = skb->len;
+
+ /* Mask interrupts from the ethercard.
+ SMP: We have to grab the lock here otherwise the IRQ handler
+ on another CPU can flip window and race the IRQ mask set. We end
+ up trashing the mcast filter not disabling irqs if we dont lock */
+
+ spin_lock_irqsave(&ei_local->page_lock, flags);
+ outb_p(0x00, e8390_base + EN0_IMR);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
+
+
+ /*
+ * Slow phase with lock held.
+ */
+
+ disable_irq_nosync(dev->irq);
+
+ spin_lock(&ei_local->page_lock);
+
+ ei_local->irqlock = 1;
+
+ send_length = ETH_ZLEN < length ? length : ETH_ZLEN;
+
+#ifdef EI_PINGPONG
+
+ /*
+ * We have two Tx slots available for use. Find the first free
+ * slot, and then perform some sanity checks. With two Tx bufs,
+ * you get very close to transmitting back-to-back packets. With
+ * only one Tx buf, the transmitter sits idle while you reload the
+ * card, leaving a substantial gap between each transmitted packet.
+ */
+
+ if (ei_local->tx1 == 0)
+ {
+ output_page = ei_local->tx_start_page;
+ ei_local->tx1 = send_length;
+ if (ei_debug && ei_local->tx2 > 0)
+ printk(KERN_DEBUG "%s: idle transmitter tx2=%d, lasttx=%d, txing=%d.\n",
+ dev->name, ei_local->tx2, ei_local->lasttx, ei_local->txing);
+ }
+ else if (ei_local->tx2 == 0)
+ {
+ output_page = ei_local->tx_start_page + TX_1X_PAGES;
+ ei_local->tx2 = send_length;
+ if (ei_debug && ei_local->tx1 > 0)
+ printk(KERN_DEBUG "%s: idle transmitter, tx1=%d, lasttx=%d, txing=%d.\n",
+ dev->name, ei_local->tx1, ei_local->lasttx, ei_local->txing);
+ }
+ else
+ { /* We should never get here. */
+ if (ei_debug)
+ printk(KERN_DEBUG "%s: No Tx buffers free! tx1=%d tx2=%d last=%d\n",
+ dev->name, ei_local->tx1, ei_local->tx2, ei_local->lasttx);
+ ei_local->irqlock = 0;
+ netif_stop_queue(dev);
+ outb_p(ENISR_ALL, e8390_base + EN0_IMR);
+ spin_unlock(&ei_local->page_lock);
+ enable_irq(dev->irq);
+ ei_local->stat.tx_errors++;
+ return 1;
+ }
+
+ /*
+ * Okay, now upload the packet and trigger a send if the transmitter
+ * isn't already sending. If it is busy, the interrupt handler will
+ * trigger the send later, upon receiving a Tx done interrupt.
+ */
+
+ ei_block_output(dev, length, skb->data, output_page);
+ if (! ei_local->txing)
+ {
+ ei_local->txing = 1;
+ NS8390_trigger_send(dev, send_length, output_page);
+ dev->trans_start = jiffies;
+ if (output_page == ei_local->tx_start_page)
+ {
+ ei_local->tx1 = -1;
+ ei_local->lasttx = -1;
+ }
+ else
+ {
+ ei_local->tx2 = -1;
+ ei_local->lasttx = -2;
+ }
+ }
+ else ei_local->txqueue++;
+
+ if (ei_local->tx1 && ei_local->tx2)
+ netif_stop_queue(dev);
+ else
+ netif_start_queue(dev);
+
+#else /* EI_PINGPONG */
+
+ /*
+ * Only one Tx buffer in use. You need two Tx bufs to come close to
+ * back-to-back transmits. Expect a 20 -> 25% performance hit on
+ * reasonable hardware if you only use one Tx buffer.
+ */
+
+ ei_block_output(dev, length, skb->data, ei_local->tx_start_page);
+ ei_local->txing = 1;
+ NS8390_trigger_send(dev, send_length, ei_local->tx_start_page);
+ dev->trans_start = jiffies;
+ netif_stop_queue(dev);
+
+#endif /* EI_PINGPONG */
+
+ /* Turn 8390 interrupts back on. */
+ ei_local->irqlock = 0;
+ outb_p(ENISR_ALL, e8390_base + EN0_IMR);
+
+ spin_unlock(&ei_local->page_lock);
+ enable_irq(dev->irq);
+
+ dev_kfree_skb (skb);
+ ei_local->stat.tx_bytes += send_length;
+
+ return 0;
+}
+
+/**
+ * ei_interrupt - handle the interrupts from an 8390
+ * @irq: interrupt number
+ * @dev_id: a pointer to the net_device
+ * @regs: unused
+ *
+ * Handle the ether interface interrupts. We pull packets from
+ * the 8390 via the card specific functions and fire them at the networking
+ * stack. We also handle transmit completions and wake the transmit path if
+ * neccessary. We also update the counters and do other housekeeping as
+ * needed.
+ */
+
+void ei_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+{
+ struct net_device *dev = dev_id;
+ long e8390_base;
+ int interrupts, nr_serviced = 0;
+ struct ei_device *ei_local;
+
+ if (dev == NULL)
+ {
+ printk ("net_interrupt(): irq %d for unknown device.\n", irq);
+ return;
+ }
+
+ e8390_base = dev->base_addr;
+ ei_local = (struct ei_device *) dev->priv;
+
+ /*
+ * Protect the irq test too.
+ */
+
+ spin_lock(&ei_local->page_lock);
+
+ if (ei_local->irqlock)
+ {
+#if 1 /* This might just be an interrupt for a PCI device sharing this line */
+ /* The "irqlock" check is only for testing. */
+ printk(ei_local->irqlock
+ ? "%s: Interrupted while interrupts are masked! isr=%#2x imr=%#2x.\n"
+ : "%s: Reentering the interrupt handler! isr=%#2x imr=%#2x.\n",
+ dev->name, inb_p(e8390_base + EN0_ISR),
+ inb_p(e8390_base + EN0_IMR));
+#endif
+ spin_unlock(&ei_local->page_lock);
+ return;
+ }
+
+ /* Change to page 0 and read the intr status reg. */
+ outb_p(E8390_NODMA+E8390_PAGE0, e8390_base + E8390_CMD);
+ if (ei_debug > 3)
+ printk(KERN_DEBUG "%s: interrupt(isr=%#2.2x).\n", dev->name,
+ inb_p(e8390_base + EN0_ISR));
+
+ /* !!Assumption!! -- we stay in page 0. Don't break this. */
+ while ((interrupts = inb_p(e8390_base + EN0_ISR)) != 0
+ && ++nr_serviced < MAX_SERVICE)
+ {
+ if (!netif_running(dev)) {
+ printk(KERN_WARNING "%s: interrupt from stopped card\n", dev->name);
+ /* rmk - acknowledge the interrupts */
+ outb_p(interrupts, e8390_base + EN0_ISR);
+ interrupts = 0;
+ break;
+ }
+ if (interrupts & ENISR_OVER)
+ ei_rx_overrun(dev);
+ else if (interrupts & (ENISR_RX+ENISR_RX_ERR))
+ {
+ /* Got a good (?) packet. */
+ ei_receive(dev);
+ }
+ /* Push the next to-transmit packet through. */
+ if (interrupts & ENISR_TX)
+ ei_tx_intr(dev);
+ else if (interrupts & ENISR_TX_ERR)
+ ei_tx_err(dev);
+
+ if (interrupts & ENISR_COUNTERS)
+ {
+ ei_local->stat.rx_frame_errors += inb_p(e8390_base + EN0_COUNTER0);
+ ei_local->stat.rx_crc_errors += inb_p(e8390_base + EN0_COUNTER1);
+ ei_local->stat.rx_missed_errors+= inb_p(e8390_base + EN0_COUNTER2);
+ outb_p(ENISR_COUNTERS, e8390_base + EN0_ISR); /* Ack intr. */
+ }
+
+ /* Ignore any RDC interrupts that make it back to here. */
+ if (interrupts & ENISR_RDC)
+ {
+ outb_p(ENISR_RDC, e8390_base + EN0_ISR);
+ }
+
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base + E8390_CMD);
+ }
+
+ if (interrupts && ei_debug)
+ {
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base + E8390_CMD);
+ if (nr_serviced >= MAX_SERVICE)
+ {
+ /* 0xFF is valid for a card removal */
+ if(interrupts!=0xFF)
+ printk(KERN_WARNING "%s: Too much work at interrupt, status %#2.2x\n",
+ dev->name, interrupts);
+ outb_p(ENISR_ALL, e8390_base + EN0_ISR); /* Ack. most intrs. */
+ } else {
+ printk(KERN_WARNING "%s: unknown interrupt %#2x\n", dev->name, interrupts);
+ outb_p(0xff, e8390_base + EN0_ISR); /* Ack. all intrs. */
+ }
+ }
+ spin_unlock(&ei_local->page_lock);
+ return;
+}
+
+/**
+ * ei_tx_err - handle transmitter error
+ * @dev: network device which threw the exception
+ *
+ * A transmitter error has happened. Most likely excess collisions (which
+ * is a fairly normal condition). If the error is one where the Tx will
+ * have been aborted, we try and send another one right away, instead of
+ * letting the failed packet sit and collect dust in the Tx buffer. This
+ * is a much better solution as it avoids kernel based Tx timeouts, and
+ * an unnecessary card reset.
+ *
+ * Called with lock held.
+ */
+
+static void ei_tx_err(struct net_device *dev)
+{
+ long e8390_base = dev->base_addr;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ unsigned char txsr = inb_p(e8390_base+EN0_TSR);
+ unsigned char tx_was_aborted = txsr & (ENTSR_ABT+ENTSR_FU);
+
+#ifdef VERBOSE_ERROR_DUMP
+ printk(KERN_DEBUG "%s: transmitter error (%#2x): ", dev->name, txsr);
+ if (txsr & ENTSR_ABT)
+ printk("excess-collisions ");
+ if (txsr & ENTSR_ND)
+ printk("non-deferral ");
+ if (txsr & ENTSR_CRS)
+ printk("lost-carrier ");
+ if (txsr & ENTSR_FU)
+ printk("FIFO-underrun ");
+ if (txsr & ENTSR_CDH)
+ printk("lost-heartbeat ");
+ printk("\n");
+#endif
+
+ outb_p(ENISR_TX_ERR, e8390_base + EN0_ISR); /* Ack intr. */
+
+ if (tx_was_aborted)
+ ei_tx_intr(dev);
+ else
+ {
+ ei_local->stat.tx_errors++;
+ if (txsr & ENTSR_CRS) ei_local->stat.tx_carrier_errors++;
+ if (txsr & ENTSR_CDH) ei_local->stat.tx_heartbeat_errors++;
+ if (txsr & ENTSR_OWC) ei_local->stat.tx_window_errors++;
+ }
+}
+
+/**
+ * ei_tx_intr - transmit interrupt handler
+ * @dev: network device for which tx intr is handled
+ *
+ * We have finished a transmit: check for errors and then trigger the next
+ * packet to be sent. Called with lock held.
+ */
+
+static void ei_tx_intr(struct net_device *dev)
+{
+ long e8390_base = dev->base_addr;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ int status = inb(e8390_base + EN0_TSR);
+
+ outb_p(ENISR_TX, e8390_base + EN0_ISR); /* Ack intr. */
+
+#ifdef EI_PINGPONG
+
+ /*
+ * There are two Tx buffers, see which one finished, and trigger
+ * the send of another one if it exists.
+ */
+ ei_local->txqueue--;
+
+ if (ei_local->tx1 < 0)
+ {
+ if (ei_local->lasttx != 1 && ei_local->lasttx != -1)
+ printk(KERN_ERR "%s: bogus last_tx_buffer %d, tx1=%d.\n",
+ ei_local->name, ei_local->lasttx, ei_local->tx1);
+ ei_local->tx1 = 0;
+ if (ei_local->tx2 > 0)
+ {
+ ei_local->txing = 1;
+ NS8390_trigger_send(dev, ei_local->tx2, ei_local->tx_start_page + 6);
+ dev->trans_start = jiffies;
+ ei_local->tx2 = -1,
+ ei_local->lasttx = 2;
+ }
+ else ei_local->lasttx = 20, ei_local->txing = 0;
+ }
+ else if (ei_local->tx2 < 0)
+ {
+ if (ei_local->lasttx != 2 && ei_local->lasttx != -2)
+ printk("%s: bogus last_tx_buffer %d, tx2=%d.\n",
+ ei_local->name, ei_local->lasttx, ei_local->tx2);
+ ei_local->tx2 = 0;
+ if (ei_local->tx1 > 0)
+ {
+ ei_local->txing = 1;
+ NS8390_trigger_send(dev, ei_local->tx1, ei_local->tx_start_page);
+ dev->trans_start = jiffies;
+ ei_local->tx1 = -1;
+ ei_local->lasttx = 1;
+ }
+ else
+ ei_local->lasttx = 10, ei_local->txing = 0;
+ }
+// else printk(KERN_WARNING "%s: unexpected TX-done interrupt, lasttx=%d.\n",
+// dev->name, ei_local->lasttx);
+
+#else /* EI_PINGPONG */
+ /*
+ * Single Tx buffer: mark it free so another packet can be loaded.
+ */
+ ei_local->txing = 0;
+#endif
+
+ /* Minimize Tx latency: update the statistics after we restart TXing. */
+ if (status & ENTSR_COL)
+ ei_local->stat.collisions++;
+ if (status & ENTSR_PTX)
+ ei_local->stat.tx_packets++;
+ else
+ {
+ ei_local->stat.tx_errors++;
+ if (status & ENTSR_ABT)
+ {
+ ei_local->stat.tx_aborted_errors++;
+ ei_local->stat.collisions += 16;
+ }
+ if (status & ENTSR_CRS)
+ ei_local->stat.tx_carrier_errors++;
+ if (status & ENTSR_FU)
+ ei_local->stat.tx_fifo_errors++;
+ if (status & ENTSR_CDH)
+ ei_local->stat.tx_heartbeat_errors++;
+ if (status & ENTSR_OWC)
+ ei_local->stat.tx_window_errors++;
+ }
+ netif_wake_queue(dev);
+}
+
+/**
+ * ei_receive - receive some packets
+ * @dev: network device with which receive will be run
+ *
+ * We have a good packet(s), get it/them out of the buffers.
+ * Called with lock held.
+ */
+
+static void ei_receive(struct net_device *dev)
+{
+ long e8390_base = dev->base_addr;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ unsigned char rxing_page, this_frame, next_frame;
+ unsigned short current_offset;
+ int rx_pkt_count = 0;
+ struct e8390_pkt_hdr rx_frame;
+ int num_rx_pages = ei_local->stop_page-ei_local->rx_start_page;
+
+ while (++rx_pkt_count < 10)
+ {
+ int pkt_len, pkt_stat;
+
+ /* Get the rx page (incoming packet pointer). */
+ outb_p(E8390_NODMA+E8390_PAGE1, e8390_base + E8390_CMD);
+ rxing_page = inb_p(e8390_base + EN1_CURPAG);
+ outb_p(E8390_NODMA+E8390_PAGE0, e8390_base + E8390_CMD);
+
+ /* Remove one frame from the ring. Boundary is always a page behind. */
+ this_frame = inb_p(e8390_base + EN0_BOUNDARY) + 1;
+ if (this_frame >= ei_local->stop_page)
+ this_frame = ei_local->rx_start_page;
+
+ /* Someday we'll omit the previous, iff we never get this message.
+ (There is at least one clone claimed to have a problem.)
+
+ Keep quiet if it looks like a card removal. One problem here
+ is that some clones crash in roughly the same way.
+ */
+ if (ei_debug > 0 && this_frame != ei_local->current_page && (this_frame!=0x0 || rxing_page!=0xFF))
+ printk(KERN_ERR "%s: mismatched read page pointers %2x vs %2x.\n",
+ dev->name, this_frame, ei_local->current_page);
+
+ if (this_frame == rxing_page) /* Read all the frames? */
+ break; /* Done for now */
+
+ current_offset = this_frame << 8;
+ ei_get_8390_hdr(dev, &rx_frame, this_frame);
+
+ pkt_len = rx_frame.count - sizeof(struct e8390_pkt_hdr);
+ pkt_stat = rx_frame.status;
+
+ next_frame = this_frame + 1 + ((pkt_len+4)>>8);
+
+ /* Check for bogosity warned by 3c503 book: the status byte is never
+ written. This happened a lot during testing! This code should be
+ cleaned up someday. */
+ if (rx_frame.next != next_frame
+ && rx_frame.next != next_frame + 1
+ && rx_frame.next != next_frame - num_rx_pages
+ && rx_frame.next != next_frame + 1 - num_rx_pages) {
+ ei_local->current_page = rxing_page;
+ outb(ei_local->current_page-1, e8390_base+EN0_BOUNDARY);
+ ei_local->stat.rx_errors++;
+ continue;
+ }
+
+ if (pkt_len < 60 || pkt_len > 1518)
+ {
+ if (ei_debug)
+ printk(KERN_DEBUG "%s: bogus packet size: %d, status=%#2x nxpg=%#2x.\n",
+ dev->name, rx_frame.count, rx_frame.status,
+ rx_frame.next);
+ ei_local->stat.rx_errors++;
+ ei_local->stat.rx_length_errors++;
+ }
+ else if ((pkt_stat & 0x0F) == ENRSR_RXOK)
+ {
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(pkt_len+2);
+ if (skb == NULL)
+ {
+ if (ei_debug > 1)
+ printk(KERN_DEBUG "%s: Couldn't allocate a sk_buff of size %d.\n",
+ dev->name, pkt_len);
+ ei_local->stat.rx_dropped++;
+ break;
+ }
+ else
+ {
+ skb_reserve(skb,2); /* IP headers on 16 byte boundaries */
+ skb->dev = dev;
+ skb_put(skb, pkt_len); /* Make room */
+ ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+ ei_local->stat.rx_packets++;
+ ei_local->stat.rx_bytes += pkt_len;
+ if (pkt_stat & ENRSR_PHY)
+ ei_local->stat.multicast++;
+ }
+ }
+ else
+ {
+ if (ei_debug)
+ printk(KERN_DEBUG "%s: bogus packet: status=%#2x nxpg=%#2x size=%d\n",
+ dev->name, rx_frame.status, rx_frame.next,
+ rx_frame.count);
+ ei_local->stat.rx_errors++;
+ /* NB: The NIC counts CRC, frame and missed errors. */
+ if (pkt_stat & ENRSR_FO)
+ ei_local->stat.rx_fifo_errors++;
+ }
+ next_frame = rx_frame.next;
+
+ /* This _should_ never happen: it's here for avoiding bad clones. */
+ if (next_frame >= ei_local->stop_page) {
+ printk("%s: next frame inconsistency, %#2x\n", dev->name,
+ next_frame);
+ next_frame = ei_local->rx_start_page;
+ }
+ ei_local->current_page = next_frame;
+ outb_p(next_frame-1, e8390_base+EN0_BOUNDARY);
+ }
+
+ /* We used to also ack ENISR_OVER here, but that would sometimes mask
+ a real overrun, leaving the 8390 in a stopped state with rec'vr off. */
+ outb_p(ENISR_RX+ENISR_RX_ERR, e8390_base+EN0_ISR);
+ return;
+}
+
+/**
+ * ei_rx_overrun - handle receiver overrun
+ * @dev: network device which threw exception
+ *
+ * We have a receiver overrun: we have to kick the 8390 to get it started
+ * again. Problem is that you have to kick it exactly as NS prescribes in
+ * the updated datasheets, or "the NIC may act in an unpredictable manner."
+ * This includes causing "the NIC to defer indefinitely when it is stopped
+ * on a busy network." Ugh.
+ * Called with lock held. Don't call this with the interrupts off or your
+ * computer will hate you - it takes 10ms or so.
+ */
+
+static void ei_rx_overrun(struct net_device *dev)
+{
+ long e8390_base = dev->base_addr;
+ unsigned char was_txing, must_resend = 0;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+
+ /*
+ * Record whether a Tx was in progress and then issue the
+ * stop command.
+ */
+ was_txing = inb_p(e8390_base+E8390_CMD) & E8390_TRANS;
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, e8390_base+E8390_CMD);
+
+ if (ei_debug > 1)
+ printk(KERN_DEBUG "%s: Receiver overrun.\n", dev->name);
+ ei_local->stat.rx_over_errors++;
+
+ /*
+ * Wait a full Tx time (1.2ms) + some guard time, NS says 1.6ms total.
+ * Early datasheets said to poll the reset bit, but now they say that
+ * it "is not a reliable indicator and subsequently should be ignored."
+ * We wait at least 10ms.
+ */
+
+ udelay(10*1000);
+
+ /*
+ * Reset RBCR[01] back to zero as per magic incantation.
+ */
+ outb_p(0x00, e8390_base+EN0_RCNTLO);
+ outb_p(0x00, e8390_base+EN0_RCNTHI);
+
+ /*
+ * See if any Tx was interrupted or not. According to NS, this
+ * step is vital, and skipping it will cause no end of havoc.
+ */
+
+ if (was_txing)
+ {
+ unsigned char tx_completed = inb_p(e8390_base+EN0_ISR) & (ENISR_TX+ENISR_TX_ERR);
+ if (!tx_completed)
+ must_resend = 1;
+ }
+
+ /*
+ * Have to enter loopback mode and then restart the NIC before
+ * you are allowed to slurp packets up off the ring.
+ */
+ outb_p(E8390_TXOFF, e8390_base + EN0_TXCR);
+ outb_p(E8390_NODMA + E8390_PAGE0 + E8390_START, e8390_base + E8390_CMD);
+
+ /*
+ * Clear the Rx ring of all the debris, and ack the interrupt.
+ */
+ ei_receive(dev);
+ outb_p(ENISR_OVER, e8390_base+EN0_ISR);
+
+ /*
+ * Leave loopback mode, and resend any packet that got stopped.
+ */
+ outb_p(E8390_TXCONFIG, e8390_base + EN0_TXCR);
+ if (must_resend)
+ outb_p(E8390_NODMA + E8390_PAGE0 + E8390_START + E8390_TRANS, e8390_base + E8390_CMD);
+}
+
+/*
+ * Collect the stats. This is called unlocked and from several contexts.
+ */
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+ long ioaddr = dev->base_addr;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ unsigned long flags;
+
+ /* If the card is stopped, just return the present stats. */
+ if (!netif_running(dev))
+ return &ei_local->stat;
+
+ spin_lock_irqsave(&ei_local->page_lock,flags);
+ /* Read the counter registers, assuming we are in page 0. */
+ ei_local->stat.rx_frame_errors += inb_p(ioaddr + EN0_COUNTER0);
+ ei_local->stat.rx_crc_errors += inb_p(ioaddr + EN0_COUNTER1);
+ ei_local->stat.rx_missed_errors+= inb_p(ioaddr + EN0_COUNTER2);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
+
+ return &ei_local->stat;
+}
+
+/*
+ * Update the given Autodin II CRC value with another data byte.
+ */
+
+static inline u32 update_crc(u8 byte, u32 current_crc)
+{
+ int bit;
+ u8 ah = 0;
+ for (bit=0; bit<8; bit++)
+ {
+ u8 carry = (current_crc>>31);
+ current_crc <<= 1;
+ ah = ((ah<<1) | carry) ^ byte;
+ if (ah&1)
+ current_crc ^= 0x04C11DB7; /* CRC polynomial */
+ ah >>= 1;
+ byte >>= 1;
+ }
+ return current_crc;
+}
+
+/*
+ * Form the 64 bit 8390 multicast table from the linked list of addresses
+ * associated with this dev structure.
+ */
+
+static inline void make_mc_bits(u8 *bits, struct net_device *dev)
+{
+ struct dev_mc_list *dmi;
+
+ for (dmi=dev->mc_list; dmi; dmi=dmi->next)
+ {
+ int i;
+ u32 crc;
+ if (dmi->dmi_addrlen != ETH_ALEN)
+ {
+ printk(KERN_INFO "%s: invalid multicast address length given.\n", dev->name);
+ continue;
+ }
+ crc = 0xffffffff; /* initial CRC value */
+ for (i=0; i<ETH_ALEN; i++)
+ crc = update_crc(dmi->dmi_addr[i], crc);
+ /*
+ * The 8390 uses the 6 most significant bits of the
+ * CRC to index the multicast table.
+ */
+ bits[crc>>29] |= (1<<((crc>>26)&7));
+ }
+}
+
+/**
+ * do_set_multicast_list - set/clear multicast filter
+ * @dev: net device for which multicast filter is adjusted
+ *
+ * Set or clear the multicast filter for this adaptor. May be called
+ * from a BH in 2.1.x. Must be called with lock held.
+ */
+
+static void do_set_multicast_list(struct net_device *dev)
+{
+ long e8390_base = dev->base_addr;
+ int i;
+ struct ei_device *ei_local = (struct ei_device*)dev->priv;
+
+ if (!(dev->flags&(IFF_PROMISC|IFF_ALLMULTI)))
+ {
+ memset(ei_local->mcfilter, 0, 8);
+ if (dev->mc_list)
+ make_mc_bits(ei_local->mcfilter, dev);
+ }
+ else
+ memset(ei_local->mcfilter, 0xFF, 8); /* mcast set to accept-all */
+
+ /*
+ * DP8390 manuals don't specify any magic sequence for altering
+ * the multicast regs on an already running card. To be safe, we
+ * ensure multicast mode is off prior to loading up the new hash
+ * table. If this proves to be not enough, we can always resort
+ * to stopping the NIC, loading the table and then restarting.
+ *
+ * Bug Alert! The MC regs on the SMC 83C690 (SMC Elite and SMC
+ * Elite16) appear to be write-only. The NS 8390 data sheet lists
+ * them as r/w so this is a bug. The SMC 83C790 (SMC Ultra and
+ * Ultra32 EISA) appears to have this bug fixed.
+ */
+
+ if (netif_running(dev))
+ outb_p(E8390_RXCONFIG, e8390_base + EN0_RXCR);
+ outb_p(E8390_NODMA + E8390_PAGE1, e8390_base + E8390_CMD);
+ for(i = 0; i < 8; i++)
+ {
+ outb_p(ei_local->mcfilter[i], e8390_base + EN1_MULT_SHIFT(i));
+#ifndef BUG_83C690
+ if(inb_p(e8390_base + EN1_MULT_SHIFT(i))!=ei_local->mcfilter[i])
+ printk(KERN_ERR "Multicast filter read/write mismap %d\n",i);
+#endif
+ }
+ outb_p(E8390_NODMA + E8390_PAGE0, e8390_base + E8390_CMD);
+
+ if(dev->flags&IFF_PROMISC)
+ outb_p(E8390_RXCONFIG | 0x18, e8390_base + EN0_RXCR);
+ else if(dev->flags&IFF_ALLMULTI || dev->mc_list)
+ outb_p(E8390_RXCONFIG | 0x08, e8390_base + EN0_RXCR);
+ else
+ outb_p(E8390_RXCONFIG, e8390_base + EN0_RXCR);
+ }
+
+/*
+ * Called without lock held. This is invoked from user context and may
+ * be parallel to just about everything else. Its also fairly quick and
+ * not called too often. Must protect against both bh and irq users
+ */
+
+static void set_multicast_list(struct net_device *dev)
+{
+ unsigned long flags;
+ struct ei_device *ei_local = (struct ei_device*)dev->priv;
+
+ spin_lock_irqsave(&ei_local->page_lock, flags);
+ do_set_multicast_list(dev);
+ spin_unlock_irqrestore(&ei_local->page_lock, flags);
+}
+
+/**
+ * ethdev_init - init rest of 8390 device struct
+ * @dev: network device structure to init
+ *
+ * Initialize the rest of the 8390 device structure. Do NOT __init
+ * this, as it is used by 8390 based modular drivers too.
+ */
+
+int ethdev_init(struct net_device *dev)
+{
+ if (ei_debug > 1)
+ printk(version);
+
+ if (dev->priv == NULL)
+ {
+ struct ei_device *ei_local;
+
+ dev->priv = kmalloc(sizeof(struct ei_device), GFP_KERNEL);
+ if (dev->priv == NULL)
+ return -ENOMEM;
+ memset(dev->priv, 0, sizeof(struct ei_device));
+ ei_local = (struct ei_device *)dev->priv;
+ spin_lock_init(&ei_local->page_lock);
+ }
+
+ dev->hard_start_xmit = &ei_start_xmit;
+ dev->get_stats = get_stats;
+ dev->set_multicast_list = &set_multicast_list;
+
+ ether_setup(dev);
+
+ return 0;
+}
+
+
+
+/* This page of functions should be 8390 generic */
+/* Follow National Semi's recommendations for initializing the "NIC". */
+
+/**
+ * NS8390_init - initialize 8390 hardware
+ * @dev: network device to initialize
+ * @startp: boolean. non-zero value to initiate chip processing
+ *
+ * Must be called with lock held.
+ */
+
+void NS8390_init(struct net_device *dev, int startp)
+{
+ long e8390_base = dev->base_addr;
+ struct ei_device *ei_local = (struct ei_device *) dev->priv;
+ int i;
+ int endcfg = ei_local->word16
+ ? (0x48 | ENDCFG_WTS | (ei_local->bigendian ? ENDCFG_BOS : 0))
+ : 0x48;
+
+ if(sizeof(struct e8390_pkt_hdr)!=4)
+ panic("8390.c: header struct mispacked\n");
+ /* Follow National Semi's recommendations for initing the DP83902. */
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, e8390_base+E8390_CMD); /* 0x21 */
+ outb_p(endcfg, e8390_base + EN0_DCFG); /* 0x48 or 0x49 */
+ /* Clear the remote byte count registers. */
+ outb_p(0x00, e8390_base + EN0_RCNTLO);
+ outb_p(0x00, e8390_base + EN0_RCNTHI);
+ /* Set to monitor and loopback mode -- this is vital!. */
+ outb_p(E8390_RXOFF, e8390_base + EN0_RXCR); /* 0x20 */
+ outb_p(E8390_TXOFF, e8390_base + EN0_TXCR); /* 0x02 */
+ /* Set the transmit page and receive ring. */
+ outb_p(ei_local->tx_start_page, e8390_base + EN0_TPSR);
+ ei_local->tx1 = ei_local->tx2 = 0;
+ outb_p(ei_local->rx_start_page, e8390_base + EN0_STARTPG);
+ outb_p(ei_local->stop_page-1, e8390_base + EN0_BOUNDARY); /* 3c503 says 0x3f,NS0x26*/
+ ei_local->current_page = ei_local->rx_start_page; /* assert boundary+1 */
+ outb_p(ei_local->stop_page, e8390_base + EN0_STOPPG);
+ /* Clear the pending interrupts and mask. */
+ outb_p(0xFF, e8390_base + EN0_ISR);
+ outb_p(0x00, e8390_base + EN0_IMR);
+
+ /* Copy the station address into the DS8390 registers. */
+
+ outb_p(E8390_NODMA + E8390_PAGE1 + E8390_STOP, e8390_base+E8390_CMD); /* 0x61 */
+ for(i = 0; i < 6; i++)
+ {
+ outb_p(dev->dev_addr[i], e8390_base + EN1_PHYS_SHIFT(i));
+ if(inb_p(e8390_base + EN1_PHYS_SHIFT(i))!=dev->dev_addr[i])
+ printk(KERN_ERR "Hw. address read/write mismap %d\n",i);
+ }
+
+ outb_p(ei_local->rx_start_page, e8390_base + EN1_CURPAG);
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, e8390_base+E8390_CMD);
+
+ netif_start_queue(dev);
+ ei_local->tx1 = ei_local->tx2 = 0;
+ ei_local->txing = 0;
+
+ if (startp)
+ {
+ outb_p(0xff, e8390_base + EN0_ISR);
+ outb_p(ENISR_ALL, e8390_base + EN0_IMR);
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base+E8390_CMD);
+ outb_p(E8390_TXCONFIG, e8390_base + EN0_TXCR); /* xmit on. */
+ /* 3c503 TechMan says rxconfig only after the NIC is started. */
+ outb_p(E8390_RXCONFIG, e8390_base + EN0_RXCR); /* rx on, */
+ do_set_multicast_list(dev); /* (re)load the mcast table */
+ }
+}
+
+/* Trigger a transmit start, assuming the length is valid.
+ Always called with the page lock held */
+
+static void NS8390_trigger_send(struct net_device *dev, unsigned int length,
+ int start_page)
+{
+ long e8390_base = dev->base_addr;
+ struct ei_device *ei_local __attribute((unused)) = (struct ei_device *) dev->priv;
+
+ outb_p(E8390_NODMA+E8390_PAGE0, e8390_base+E8390_CMD);
+
+ if (inb_p(e8390_base) & E8390_TRANS)
+ {
+ printk(KERN_WARNING "%s: trigger_send() called with the transmitter busy.\n",
+ dev->name);
+ return;
+ }
+ outb_p(length & 0xff, e8390_base + EN0_TCNTLO);
+ outb_p(length >> 8, e8390_base + EN0_TCNTHI);
+ outb_p(start_page, e8390_base + EN0_TPSR);
+ outb_p(E8390_NODMA+E8390_TRANS+E8390_START, e8390_base+E8390_CMD);
+}
+
+EXPORT_SYMBOL(ei_open);
+EXPORT_SYMBOL(ei_close);
+EXPORT_SYMBOL(ei_interrupt);
+EXPORT_SYMBOL(ei_tx_timeout);
+EXPORT_SYMBOL(ethdev_init);
+EXPORT_SYMBOL(NS8390_init);
+
+#if defined(MODULE)
+
+int init_module(void)
+{
+ return 0;
+}
+
+void cleanup_module(void)
+{
+}
+
+#endif /* MODULE */
diff --git a/xen/drivers/net/ne/8390.h b/xen/drivers/net/ne/8390.h
new file mode 100644
index 0000000000..1a3be1775d
--- /dev/null
+++ b/xen/drivers/net/ne/8390.h
@@ -0,0 +1,197 @@
+/* Generic NS8390 register definitions. */
+/* This file is part of Donald Becker's 8390 drivers, and is distributed
+ under the same license. Auto-loading of 8390.o only in v2.2 - Paul G.
+ Some of these names and comments originated from the Crynwr
+ packet drivers, which are distributed under the GPL. */
+
+#ifndef _8390_h
+#define _8390_h
+
+#include <xeno/config.h>
+#include <xeno/if_ether.h>
+#include <xeno/ioport.h>
+#include <xeno/skbuff.h>
+
+#define TX_2X_PAGES 12
+#define TX_1X_PAGES 6
+
+/* Should always use two Tx slots to get back-to-back transmits. */
+#define EI_PINGPONG
+
+#ifdef EI_PINGPONG
+#define TX_PAGES TX_2X_PAGES
+#else
+#define TX_PAGES TX_1X_PAGES
+#endif
+
+#define ETHER_ADDR_LEN 6
+
+/* The 8390 specific per-packet-header format. */
+struct e8390_pkt_hdr {
+ unsigned char status; /* status */
+ unsigned char next; /* pointer to next packet. */
+ unsigned short count; /* header + packet length in bytes */
+};
+
+#ifdef notdef
+extern int ei_debug;
+#else
+#define ei_debug 1
+#endif
+
+#ifndef HAVE_AUTOIRQ
+/* From auto_irq.c */
+extern void autoirq_setup(int waittime);
+extern unsigned long autoirq_report(int waittime);
+#endif
+
+extern int ethdev_init(struct net_device *dev);
+extern void NS8390_init(struct net_device *dev, int startp);
+extern int ei_open(struct net_device *dev);
+extern int ei_close(struct net_device *dev);
+extern void ei_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+/* Most of these entries should be in 'struct net_device' (or most of the
+ things in there should be here!) */
+/* You have one of these per-board */
+struct ei_device {
+ const char *name;
+ void (*reset_8390)(struct net_device *);
+ void (*get_8390_hdr)(struct net_device *, struct e8390_pkt_hdr *, int);
+ void (*block_output)(struct net_device *, int, const unsigned char *, int);
+ void (*block_input)(struct net_device *, int, struct sk_buff *, int);
+ unsigned char mcfilter[8];
+ unsigned open:1;
+ unsigned word16:1; /* We have the 16-bit (vs 8-bit) version of the card. */
+ unsigned bigendian:1; /* 16-bit big endian mode. Do NOT */
+ /* set this on random 8390 clones! */
+ unsigned txing:1; /* Transmit Active */
+ unsigned irqlock:1; /* 8390's intrs disabled when '1'. */
+ unsigned dmaing:1; /* Remote DMA Active */
+ unsigned char tx_start_page, rx_start_page, stop_page;
+ unsigned char current_page; /* Read pointer in buffer */
+ unsigned char interface_num; /* Net port (AUI, 10bT.) to use. */
+ unsigned char txqueue; /* Tx Packet buffer queue length. */
+ short tx1, tx2; /* Packet lengths for ping-pong tx. */
+ short lasttx; /* Alpha version consistency check. */
+ unsigned char reg0; /* Register '0' in a WD8013 */
+ unsigned char reg5; /* Register '5' in a WD8013 */
+ unsigned char saved_irq; /* Original dev->irq value. */
+ struct net_device_stats stat; /* The new statistics table. */
+ u32 *reg_offset; /* Register mapping table */
+ spinlock_t page_lock; /* Page register locks */
+ unsigned long priv; /* Private field to store bus IDs etc. */
+};
+
+/* The maximum number of 8390 interrupt service routines called per IRQ. */
+#define MAX_SERVICE 12
+
+/* The maximum time waited (in jiffies) before assuming a Tx failed. (20ms) */
+#define TX_TIMEOUT (20*HZ/100)
+
+#define ei_status (*(struct ei_device *)(dev->priv))
+
+/* Some generic ethernet register configurations. */
+#define E8390_TX_IRQ_MASK 0xa /* For register EN0_ISR */
+#define E8390_RX_IRQ_MASK 0x5
+#define E8390_RXCONFIG 0x4 /* EN0_RXCR: broadcasts, no multicast,errors */
+#define E8390_RXOFF 0x20 /* EN0_RXCR: Accept no packets */
+#define E8390_TXCONFIG 0x00 /* EN0_TXCR: Normal transmit mode */
+#define E8390_TXOFF 0x02 /* EN0_TXCR: Transmitter off */
+
+/* Register accessed at EN_CMD, the 8390 base addr. */
+#define E8390_STOP 0x01 /* Stop and reset the chip */
+#define E8390_START 0x02 /* Start the chip, clear reset */
+#define E8390_TRANS 0x04 /* Transmit a frame */
+#define E8390_RREAD 0x08 /* Remote read */
+#define E8390_RWRITE 0x10 /* Remote write */
+#define E8390_NODMA 0x20 /* Remote DMA */
+#define E8390_PAGE0 0x00 /* Select page chip registers */
+#define E8390_PAGE1 0x40 /* using the two high-order bits */
+#define E8390_PAGE2 0x80 /* Page 3 is invalid. */
+
+/*
+ * Only generate indirect loads given a machine that needs them.
+ */
+
+#if defined(CONFIG_MAC) || defined(CONFIG_AMIGA_PCMCIA) || \
+ defined(CONFIG_ARIADNE2) || defined(CONFIG_ARIADNE2_MODULE) || \
+ defined(CONFIG_HYDRA) || defined(CONFIG_HYDRA_MODULE) || \
+ defined(CONFIG_ARM_ETHERH) || defined(CONFIG_ARM_ETHERH_MODULE)
+#define EI_SHIFT(x) (ei_local->reg_offset[x])
+#else
+#define EI_SHIFT(x) (x)
+#endif
+
+#define E8390_CMD EI_SHIFT(0x00) /* The command register (for all pages) */
+/* Page 0 register offsets. */
+#define EN0_CLDALO EI_SHIFT(0x01) /* Low byte of current local dma addr RD */
+#define EN0_STARTPG EI_SHIFT(0x01) /* Starting page of ring bfr WR */
+#define EN0_CLDAHI EI_SHIFT(0x02) /* High byte of current local dma addr RD */
+#define EN0_STOPPG EI_SHIFT(0x02) /* Ending page +1 of ring bfr WR */
+#define EN0_BOUNDARY EI_SHIFT(0x03) /* Boundary page of ring bfr RD WR */
+#define EN0_TSR EI_SHIFT(0x04) /* Transmit status reg RD */
+#define EN0_TPSR EI_SHIFT(0x04) /* Transmit starting page WR */
+#define EN0_NCR EI_SHIFT(0x05) /* Number of collision reg RD */
+#define EN0_TCNTLO EI_SHIFT(0x05) /* Low byte of tx byte count WR */
+#define EN0_FIFO EI_SHIFT(0x06) /* FIFO RD */
+#define EN0_TCNTHI EI_SHIFT(0x06) /* High byte of tx byte count WR */
+#define EN0_ISR EI_SHIFT(0x07) /* Interrupt status reg RD WR */
+#define EN0_CRDALO EI_SHIFT(0x08) /* low byte of current remote dma address RD */
+#define EN0_RSARLO EI_SHIFT(0x08) /* Remote start address reg 0 */
+#define EN0_CRDAHI EI_SHIFT(0x09) /* high byte, current remote dma address RD */
+#define EN0_RSARHI EI_SHIFT(0x09) /* Remote start address reg 1 */
+#define EN0_RCNTLO EI_SHIFT(0x0a) /* Remote byte count reg WR */
+#define EN0_RCNTHI EI_SHIFT(0x0b) /* Remote byte count reg WR */
+#define EN0_RSR EI_SHIFT(0x0c) /* rx status reg RD */
+#define EN0_RXCR EI_SHIFT(0x0c) /* RX configuration reg WR */
+#define EN0_TXCR EI_SHIFT(0x0d) /* TX configuration reg WR */
+#define EN0_COUNTER0 EI_SHIFT(0x0d) /* Rcv alignment error counter RD */
+#define EN0_DCFG EI_SHIFT(0x0e) /* Data configuration reg WR */
+#define EN0_COUNTER1 EI_SHIFT(0x0e) /* Rcv CRC error counter RD */
+#define EN0_IMR EI_SHIFT(0x0f) /* Interrupt mask reg WR */
+#define EN0_COUNTER2 EI_SHIFT(0x0f) /* Rcv missed frame error counter RD */
+
+/* Bits in EN0_ISR - Interrupt status register */
+#define ENISR_RX 0x01 /* Receiver, no error */
+#define ENISR_TX 0x02 /* Transmitter, no error */
+#define ENISR_RX_ERR 0x04 /* Receiver, with error */
+#define ENISR_TX_ERR 0x08 /* Transmitter, with error */
+#define ENISR_OVER 0x10 /* Receiver overwrote the ring */
+#define ENISR_COUNTERS 0x20 /* Counters need emptying */
+#define ENISR_RDC 0x40 /* remote dma complete */
+#define ENISR_RESET 0x80 /* Reset completed */
+#define ENISR_ALL 0x3f /* Interrupts we will enable */
+
+/* Bits in EN0_DCFG - Data config register */
+#define ENDCFG_WTS 0x01 /* word transfer mode selection */
+#define ENDCFG_BOS 0x02 /* byte order selection */
+
+/* Page 1 register offsets. */
+#define EN1_PHYS EI_SHIFT(0x01) /* This board's physical enet addr RD WR */
+#define EN1_PHYS_SHIFT(i) EI_SHIFT(i+1) /* Get and set mac address */
+#define EN1_CURPAG EI_SHIFT(0x07) /* Current memory page RD WR */
+#define EN1_MULT EI_SHIFT(0x08) /* Multicast filter mask array (8 bytes) RD WR */
+#define EN1_MULT_SHIFT(i) EI_SHIFT(8+i) /* Get and set multicast filter */
+
+/* Bits in received packet status byte and EN0_RSR*/
+#define ENRSR_RXOK 0x01 /* Received a good packet */
+#define ENRSR_CRC 0x02 /* CRC error */
+#define ENRSR_FAE 0x04 /* frame alignment error */
+#define ENRSR_FO 0x08 /* FIFO overrun */
+#define ENRSR_MPA 0x10 /* missed pkt */
+#define ENRSR_PHY 0x20 /* physical/multicast address */
+#define ENRSR_DIS 0x40 /* receiver disable. set in monitor mode */
+#define ENRSR_DEF 0x80 /* deferring */
+
+/* Transmitted packet status, EN0_TSR. */
+#define ENTSR_PTX 0x01 /* Packet transmitted without error */
+#define ENTSR_ND 0x02 /* The transmit wasn't deferred. */
+#define ENTSR_COL 0x04 /* The transmit collided at least once. */
+#define ENTSR_ABT 0x08 /* The transmit collided 16 times, and was deferred. */
+#define ENTSR_CRS 0x10 /* The carrier sense was lost. */
+#define ENTSR_FU 0x20 /* A "FIFO underrun" occurred during transmit. */
+#define ENTSR_CDH 0x40 /* The collision detect "heartbeat" signal was lost. */
+#define ENTSR_OWC 0x80 /* There was an out-of-window collision. */
+
+#endif /* _8390_h */
diff --git a/xen/drivers/net/ne/Makefile b/xen/drivers/net/ne/Makefile
new file mode 100644
index 0000000000..d1bcc12d87
--- /dev/null
+++ b/xen/drivers/net/ne/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o ne_drv.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen/drivers/net/ne/ne.c b/xen/drivers/net/ne/ne.c
new file mode 100644
index 0000000000..f694fc107b
--- /dev/null
+++ b/xen/drivers/net/ne/ne.c
@@ -0,0 +1,685 @@
+/* ne.c: A general non-shared-memory NS8390 ethernet driver for linux. */
+/*
+ Written 1992-94 by Donald Becker.
+
+ Copyright 1993 United States Government as represented by the
+ Director, National Security Agency.
+
+ This software may be used and distributed according to the terms
+ of the GNU General Public License, incorporated herein by reference.
+
+ The author may be reached as becker@scyld.com, or C/O
+ Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403
+
+ This driver should work with many programmed-I/O 8390-based ethernet
+ boards. Currently it supports the NE1000, NE2000, many clones,
+ and some Cabletron products.
+
+ Changelog:
+
+ Paul Gortmaker : use ENISR_RDC to monitor Tx PIO uploads, made
+ sanity checks and bad clone support optional.
+ Paul Gortmaker : new reset code, reset card after probe at boot.
+ Paul Gortmaker : multiple card support for module users.
+ Paul Gortmaker : Support for PCI ne2k clones, similar to lance.c
+ Paul Gortmaker : Allow users with bad cards to avoid full probe.
+ Paul Gortmaker : PCI probe changes, more PCI cards supported.
+ rjohnson@analogic.com : Changed init order so an interrupt will only
+ occur after memory is allocated for dev->priv. Deallocated memory
+ last in cleanup_modue()
+ Richard Guenther : Added support for ISAPnP cards
+ Paul Gortmaker : Discontinued PCI support - use ne2k-pci.c instead.
+
+*/
+
+/* Routines for the NatSemi-based designs (NE[12]000). */
+
+static const char version1[] =
+"ne.c:v1.10 9/23/94 Donald Becker (becker@scyld.com)\n";
+static const char version2[] =
+"Last modified Nov 1, 2000 by Paul Gortmaker\n";
+
+
+#include <xeno/module.h>
+#include <xeno/kernel.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <xeno/init.h>
+#include <xeno/delay.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <xeno/netdevice.h>
+#include <xeno/etherdevice.h>
+#include "8390.h"
+
+/* Some defines that people can play with if so inclined. */
+
+/* Do we support clones that don't adhere to 14,15 of the SAprom ? */
+#define SUPPORT_NE_BAD_CLONES
+
+/* Do we perform extra sanity checks on stuff ? */
+/* #define NE_SANITY_CHECK */
+
+/* Do we implement the read before write bugfix ? */
+/* #define NE_RW_BUGFIX */
+
+/* Do we have a non std. amount of memory? (in units of 256 byte pages) */
+/* #define PACKETBUF_MEMSIZE 0x40 */
+
+#ifdef SUPPORT_NE_BAD_CLONES
+/* A list of bad clones that we none-the-less recognize. */
+static struct { const char *name8, *name16; unsigned char SAprefix[4];}
+bad_clone_list[] __initdata = {
+ {"DE100", "DE200", {0x00, 0xDE, 0x01,}},
+ {"DE120", "DE220", {0x00, 0x80, 0xc8,}},
+ {"DFI1000", "DFI2000", {'D', 'F', 'I',}}, /* Original, eh? */
+ {"EtherNext UTP8", "EtherNext UTP16", {0x00, 0x00, 0x79}},
+ {"NE1000","NE2000-invalid", {0x00, 0x00, 0xd8}}, /* Ancient real NE1000. */
+ {"NN1000", "NN2000", {0x08, 0x03, 0x08}}, /* Outlaw no-name clone. */
+ {"4-DIM8","4-DIM16", {0x00,0x00,0x4d,}}, /* Outlaw 4-Dimension cards. */
+ {"Con-Intl_8", "Con-Intl_16", {0x00, 0x00, 0x24}}, /* Connect Int'nl */
+ {"ET-100","ET-200", {0x00, 0x45, 0x54}}, /* YANG and YA clone */
+ {"COMPEX","COMPEX16",{0x00,0x80,0x48}}, /* Broken ISA Compex cards */
+ {"E-LAN100", "E-LAN200", {0x00, 0x00, 0x5d}}, /* Broken ne1000 clones */
+ {"PCM-4823", "PCM-4823", {0x00, 0xc0, 0x6c}}, /* Broken Advantech MoBo */
+ {"REALTEK", "RTL8019", {0x00, 0x00, 0xe8}}, /* no-name with Realtek chip */
+ {"LCS-8834", "LCS-8836", {0x04, 0x04, 0x37}}, /* ShinyNet (SET) */
+ {0,}
+};
+#endif
+
+/* ---- No user-serviceable parts below ---- */
+
+#define NE_BASE (dev->base_addr)
+#define NE_CMD 0x00
+#define NE_DATAPORT 0x10 /* NatSemi-defined port window offset. */
+#define NE_RESET 0x1f /* Issue a read to reset, a write to clear. */
+#define NE_IO_EXTENT 0x20
+
+#define NE1SM_START_PG 0x20 /* First page of TX buffer */
+#define NE1SM_STOP_PG 0x40 /* Last page +1 of RX ring */
+#define NESM_START_PG 0x40 /* First page of TX buffer */
+#define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */
+
+int ne_probe(struct net_device *dev);
+static int ne_probe1(struct net_device *dev, int ioaddr);
+
+static int ne_open(struct net_device *dev);
+static int ne_close(struct net_device *dev);
+
+static void ne_reset_8390(struct net_device *dev);
+static void ne_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr,
+ int ring_page);
+static void ne_block_input(struct net_device *dev, int count,
+ struct sk_buff *skb, int ring_offset);
+static void ne_block_output(struct net_device *dev, const int count,
+ const unsigned char *buf, const int start_page);
+
+
+/* Probe for various non-shared-memory ethercards.
+
+ NEx000-clone boards have a Station Address PROM (SAPROM) in the packet
+ buffer memory space. NE2000 clones have 0x57,0x57 in bytes 0x0e,0x0f of
+ the SAPROM, while other supposed NE2000 clones must be detected by their
+ SA prefix.
+
+ Reading the SAPROM from a word-wide card with the 8390 set in byte-wide
+ mode results in doubled values, which can be detected and compensated for.
+
+ The probe is also responsible for initializing the card and filling
+ in the 'dev' and 'ei_status' structures.
+
+ We use the minimum memory size for some ethercard product lines, iff we can't
+ distinguish models. You can increase the packet buffer size by setting
+ PACKETBUF_MEMSIZE. Reported Cabletron packet buffer locations are:
+ E1010 starts at 0x100 and ends at 0x2000.
+ E1010-x starts at 0x100 and ends at 0x8000. ("-x" means "more memory")
+ E2010 starts at 0x100 and ends at 0x4000.
+ E2010-x starts at 0x100 and ends at 0xffff. */
+
+int __init ne_probe(struct net_device *dev)
+{
+ unsigned int base_addr = dev->base_addr;
+
+ SET_MODULE_OWNER(dev);
+
+ /* First check any supplied i/o locations. User knows best. <cough> */
+ if (base_addr > 0x1ff) /* Check a single specified location. */
+ return ne_probe1(dev, base_addr);
+
+ return -ENODEV;
+}
+
+static int __init ne_probe1(struct net_device *dev, int ioaddr)
+{
+ int i;
+ unsigned char SA_prom[32];
+ int wordlength = 2;
+ const char *name = NULL;
+ int start_page, stop_page;
+ int neX000, ctron, copam, bad_card;
+ int reg0, ret;
+ static unsigned version_printed;
+
+ if (!request_region(ioaddr, NE_IO_EXTENT, dev->name))
+ return -EBUSY;
+
+ reg0 = inb_p(ioaddr);
+ if (reg0 == 0xFF) {
+ ret = -ENODEV;
+ goto err_out;
+ }
+
+ /* Do a preliminary verification that we have a 8390. */
+ {
+ int regd;
+ outb_p(E8390_NODMA+E8390_PAGE1+E8390_STOP, ioaddr + E8390_CMD);
+ regd = inb_p(ioaddr + 0x0d);
+ outb_p(0xff, ioaddr + 0x0d);
+ outb_p(E8390_NODMA+E8390_PAGE0, ioaddr + E8390_CMD);
+ inb_p(ioaddr + EN0_COUNTER0); /* Clear the counter by reading. */
+ if (inb_p(ioaddr + EN0_COUNTER0) != 0) {
+ outb_p(reg0, ioaddr);
+ outb_p(regd, ioaddr + 0x0d); /* Restore the old values. */
+ ret = -ENODEV;
+ goto err_out;
+ }
+ }
+
+ if (ei_debug && version_printed++ == 0)
+ printk(KERN_INFO "%s" KERN_INFO "%s", version1, version2);
+
+ printk(KERN_INFO "NE*000 ethercard probe at %#3x:", ioaddr);
+
+ /* A user with a poor card that fails to ack the reset, or that
+ does not have a valid 0x57,0x57 signature can still use this
+ without having to recompile. Specifying an i/o address along
+ with an otherwise unused dev->mem_end value of "0xBAD" will
+ cause the driver to skip these parts of the probe. */
+
+ bad_card = ((dev->base_addr != 0) && (dev->mem_end == 0xbad));
+
+ /* Reset card. Who knows what dain-bramaged state it was left in. */
+
+ {
+ unsigned long reset_start_time = jiffies;
+
+ /* DON'T change these to inb_p/outb_p or reset will fail on clones. */
+ outb(inb(ioaddr + NE_RESET), ioaddr + NE_RESET);
+
+ while ((inb_p(ioaddr + EN0_ISR) & ENISR_RESET) == 0)
+ if (jiffies - reset_start_time > 2*HZ/100) {
+ if (bad_card) {
+ printk(" (warning: no reset ack)");
+ break;
+ } else {
+ printk(" not found (no reset ack).\n");
+ ret = -ENODEV;
+ goto err_out;
+ }
+ }
+
+ outb_p(0xff, ioaddr + EN0_ISR); /* Ack all intr. */
+ }
+
+ /* Read the 16 bytes of station address PROM.
+ We must first initialize registers, similar to NS8390_init(eifdev, 0).
+ We can't reliably read the SAPROM address without this.
+ (I learned the hard way!). */
+ {
+ struct {unsigned char value, offset; } program_seq[] =
+ {
+ {E8390_NODMA+E8390_PAGE0+E8390_STOP, E8390_CMD}, /* Select page 0*/
+ {0x48, EN0_DCFG}, /* Set byte-wide (0x48) access. */
+ {0x00, EN0_RCNTLO}, /* Clear the count regs. */
+ {0x00, EN0_RCNTHI},
+ {0x00, EN0_IMR}, /* Mask completion irq. */
+ {0xFF, EN0_ISR},
+ {E8390_RXOFF, EN0_RXCR}, /* 0x20 Set to monitor */
+ {E8390_TXOFF, EN0_TXCR}, /* 0x02 and loopback mode. */
+ {32, EN0_RCNTLO},
+ {0x00, EN0_RCNTHI},
+ {0x00, EN0_RSARLO}, /* DMA starting at 0x0000. */
+ {0x00, EN0_RSARHI},
+ {E8390_RREAD+E8390_START, E8390_CMD},
+ };
+
+ for (i = 0; i < sizeof(program_seq)/sizeof(program_seq[0]); i++)
+ outb_p(program_seq[i].value, ioaddr + program_seq[i].offset);
+
+ }
+ for(i = 0; i < 32 /*sizeof(SA_prom)*/; i+=2) {
+ SA_prom[i] = inb(ioaddr + NE_DATAPORT);
+ SA_prom[i+1] = inb(ioaddr + NE_DATAPORT);
+ if (SA_prom[i] != SA_prom[i+1])
+ wordlength = 1;
+ }
+
+ if (wordlength == 2)
+ {
+ for (i = 0; i < 16; i++)
+ SA_prom[i] = SA_prom[i+i];
+ /* We must set the 8390 for word mode. */
+ outb_p(0x49, ioaddr + EN0_DCFG);
+ start_page = NESM_START_PG;
+ stop_page = NESM_STOP_PG;
+ } else {
+ start_page = NE1SM_START_PG;
+ stop_page = NE1SM_STOP_PG;
+ }
+
+ neX000 = (SA_prom[14] == 0x57 && SA_prom[15] == 0x57);
+ ctron = (SA_prom[0] == 0x00 && SA_prom[1] == 0x00 && SA_prom[2] == 0x1d);
+ copam = (SA_prom[14] == 0x49 && SA_prom[15] == 0x00);
+
+ /* Set up the rest of the parameters. */
+ if (neX000 || bad_card || copam) {
+ name = (wordlength == 2) ? "NE2000" : "NE1000";
+ }
+ else if (ctron)
+ {
+ name = (wordlength == 2) ? "Ctron-8" : "Ctron-16";
+ start_page = 0x01;
+ stop_page = (wordlength == 2) ? 0x40 : 0x20;
+ }
+ else
+ {
+#ifdef SUPPORT_NE_BAD_CLONES
+ /* Ack! Well, there might be a *bad* NE*000 clone there.
+ Check for total bogus addresses. */
+ for (i = 0; bad_clone_list[i].name8; i++)
+ {
+ if (SA_prom[0] == bad_clone_list[i].SAprefix[0] &&
+ SA_prom[1] == bad_clone_list[i].SAprefix[1] &&
+ SA_prom[2] == bad_clone_list[i].SAprefix[2])
+ {
+ if (wordlength == 2)
+ {
+ name = bad_clone_list[i].name16;
+ } else {
+ name = bad_clone_list[i].name8;
+ }
+ break;
+ }
+ }
+ if (bad_clone_list[i].name8 == NULL)
+ {
+ printk(" not found (invalid signature %2.2x %2.2x).\n",
+ SA_prom[14], SA_prom[15]);
+ ret = -ENXIO;
+ goto err_out;
+ }
+#else
+ printk(" not found.\n");
+ ret = -ENXIO;
+ goto err_out;
+#endif
+ }
+
+ if (dev->irq < 2)
+ {
+ unsigned long cookie = probe_irq_on();
+ outb_p(0x50, ioaddr + EN0_IMR); /* Enable one interrupt. */
+ outb_p(0x00, ioaddr + EN0_RCNTLO);
+ outb_p(0x00, ioaddr + EN0_RCNTHI);
+ outb_p(E8390_RREAD+E8390_START, ioaddr); /* Trigger it... */
+ mdelay(10); /* wait 10ms for interrupt to propagate */
+ outb_p(0x00, ioaddr + EN0_IMR); /* Mask it again. */
+ dev->irq = probe_irq_off(cookie);
+ if (ei_debug > 2)
+ printk(" autoirq is %d\n", dev->irq);
+ } else if (dev->irq == 2)
+ /* Fixup for users that don't know that IRQ 2 is really IRQ 9,
+ or don't know which one to set. */
+ dev->irq = 9;
+
+ if (! dev->irq) {
+ printk(" failed to detect IRQ line.\n");
+ ret = -EAGAIN;
+ goto err_out;
+ }
+
+ /* Allocate dev->priv and fill in 8390 specific dev fields. */
+ if (ethdev_init(dev))
+ {
+ printk (" unable to get memory for dev->priv.\n");
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ /* Snarf the interrupt now. There's no point in waiting since we cannot
+ share and the board will usually be enabled. */
+ ret = request_irq(dev->irq, ei_interrupt, 0, name, dev);
+ if (ret) {
+ printk (" unable to get IRQ %d (errno=%d).\n", dev->irq, ret);
+ goto err_out_kfree;
+ }
+
+ dev->base_addr = ioaddr;
+
+ for(i = 0; i < ETHER_ADDR_LEN; i++) {
+ printk(" %2.2x", SA_prom[i]);
+ dev->dev_addr[i] = SA_prom[i];
+ }
+
+ printk("\n%s: %s found at %#x, using IRQ %d.\n",
+ dev->name, name, ioaddr, dev->irq);
+
+ ei_status.name = name;
+ ei_status.tx_start_page = start_page;
+ ei_status.stop_page = stop_page;
+ ei_status.word16 = (wordlength == 2);
+
+ ei_status.rx_start_page = start_page + TX_PAGES;
+#ifdef PACKETBUF_MEMSIZE
+ /* Allow the packet buffer size to be overridden by know-it-alls. */
+ ei_status.stop_page = ei_status.tx_start_page + PACKETBUF_MEMSIZE;
+#endif
+
+ ei_status.reset_8390 = &ne_reset_8390;
+ ei_status.block_input = &ne_block_input;
+ ei_status.block_output = &ne_block_output;
+ ei_status.get_8390_hdr = &ne_get_8390_hdr;
+ ei_status.priv = 0;
+ dev->open = &ne_open;
+ dev->stop = &ne_close;
+ NS8390_init(dev, 0);
+ return 0;
+
+err_out_kfree:
+ kfree(dev->priv);
+ dev->priv = NULL;
+err_out:
+ release_region(ioaddr, NE_IO_EXTENT);
+ return ret;
+}
+
+static int ne_open(struct net_device *dev)
+{
+ ei_open(dev);
+ return 0;
+}
+
+static int ne_close(struct net_device *dev)
+{
+ if (ei_debug > 1)
+ printk(KERN_DEBUG "%s: Shutting down ethercard.\n", dev->name);
+ ei_close(dev);
+ return 0;
+}
+
+/* Hard reset the card. This used to pause for the same period that a
+ 8390 reset command required, but that shouldn't be necessary. */
+
+static void ne_reset_8390(struct net_device *dev)
+{
+ unsigned long reset_start_time = jiffies;
+
+ if (ei_debug > 1)
+ printk(KERN_DEBUG "resetting the 8390 t=%ld...", jiffies);
+
+ /* DON'T change these to inb_p/outb_p or reset will fail on clones. */
+ outb(inb(NE_BASE + NE_RESET), NE_BASE + NE_RESET);
+
+ ei_status.txing = 0;
+ ei_status.dmaing = 0;
+
+ /* This check _should_not_ be necessary, omit eventually. */
+ while ((inb_p(NE_BASE+EN0_ISR) & ENISR_RESET) == 0)
+ if (jiffies - reset_start_time > 2*HZ/100) {
+ printk(KERN_WARNING "%s: ne_reset_8390() did not complete.\n", dev->name);
+ break;
+ }
+ outb_p(ENISR_RESET, NE_BASE + EN0_ISR); /* Ack intr. */
+}
+
+/* Grab the 8390 specific header. Similar to the block_input routine, but
+ we don't need to be concerned with ring wrap as the header will be at
+ the start of a page, so we optimize accordingly. */
+
+static void ne_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page)
+{
+ int nic_base = dev->base_addr;
+
+ /* This *shouldn't* happen. If it does, it's the last thing you'll see */
+
+ if (ei_status.dmaing)
+ {
+ printk(KERN_EMERG "%s: DMAing conflict in ne_get_8390_hdr "
+ "[DMAstat:%d][irqlock:%d].\n",
+ dev->name, ei_status.dmaing, ei_status.irqlock);
+ return;
+ }
+
+ ei_status.dmaing |= 0x01;
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD);
+ outb_p(sizeof(struct e8390_pkt_hdr), nic_base + EN0_RCNTLO);
+ outb_p(0, nic_base + EN0_RCNTHI);
+ outb_p(0, nic_base + EN0_RSARLO); /* On page boundary */
+ outb_p(ring_page, nic_base + EN0_RSARHI);
+ outb_p(E8390_RREAD+E8390_START, nic_base + NE_CMD);
+
+ if (ei_status.word16)
+ insw(NE_BASE + NE_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)>>1);
+ else
+ insb(NE_BASE + NE_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr));
+
+ outb_p(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */
+ ei_status.dmaing &= ~0x01;
+
+ le16_to_cpus(&hdr->count);
+}
+
+/* Block input and output, similar to the Crynwr packet driver. If you
+ are porting to a new ethercard, look at the packet driver source for hints.
+ The NEx000 doesn't share the on-board packet memory -- you have to put
+ the packet out through the "remote DMA" dataport using outb. */
+
+static void ne_block_input(struct net_device *dev, int count, struct sk_buff *skb, int ring_offset)
+{
+#ifdef NE_SANITY_CHECK
+ int xfer_count = count;
+#endif
+ int nic_base = dev->base_addr;
+ char *buf = skb->data;
+
+ /* This *shouldn't* happen. If it does, it's the last thing you'll see */
+ if (ei_status.dmaing)
+ {
+ printk(KERN_EMERG "%s: DMAing conflict in ne_block_input "
+ "[DMAstat:%d][irqlock:%d].\n",
+ dev->name, ei_status.dmaing, ei_status.irqlock);
+ return;
+ }
+ ei_status.dmaing |= 0x01;
+ outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD);
+ outb_p(count & 0xff, nic_base + EN0_RCNTLO);
+ outb_p(count >> 8, nic_base + EN0_RCNTHI);
+ outb_p(ring_offset & 0xff, nic_base + EN0_RSARLO);
+ outb_p(ring_offset >> 8, nic_base + EN0_RSARHI);
+ outb_p(E8390_RREAD+E8390_START, nic_base + NE_CMD);
+ if (ei_status.word16)
+ {
+ insw(NE_BASE + NE_DATAPORT,buf,count>>1);
+ if (count & 0x01)
+ {
+ buf[count-1] = inb(NE_BASE + NE_DATAPORT);
+#ifdef NE_SANITY_CHECK
+ xfer_count++;
+#endif
+ }
+ } else {
+ insb(NE_BASE + NE_DATAPORT, buf, count);
+ }
+
+#ifdef NE_SANITY_CHECK
+ /* This was for the ALPHA version only, but enough people have
+ been encountering problems so it is still here. If you see
+ this message you either 1) have a slightly incompatible clone
+ or 2) have noise/speed problems with your bus. */
+
+ if (ei_debug > 1)
+ {
+ /* DMA termination address check... */
+ int addr, tries = 20;
+ do {
+ /* DON'T check for 'inb_p(EN0_ISR) & ENISR_RDC' here
+ -- it's broken for Rx on some cards! */
+ int high = inb_p(nic_base + EN0_RSARHI);
+ int low = inb_p(nic_base + EN0_RSARLO);
+ addr = (high << 8) + low;
+ if (((ring_offset + xfer_count) & 0xff) == low)
+ break;
+ } while (--tries > 0);
+ if (tries <= 0)
+ printk(KERN_WARNING "%s: RX transfer address mismatch,"
+ "%#4.4x (expected) vs. %#4.4x (actual).\n",
+ dev->name, ring_offset + xfer_count, addr);
+ }
+#endif
+ outb_p(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */
+ ei_status.dmaing &= ~0x01;
+}
+
+static void ne_block_output(struct net_device *dev, int count,
+ const unsigned char *buf, const int start_page)
+{
+ int nic_base = NE_BASE;
+ unsigned long dma_start;
+#ifdef NE_SANITY_CHECK
+ int retries = 0;
+#endif
+
+ /* Round the count up for word writes. Do we need to do this?
+ What effect will an odd byte count have on the 8390?
+ I should check someday. */
+
+ if (ei_status.word16 && (count & 0x01))
+ count++;
+
+ /* This *shouldn't* happen. If it does, it's the last thing you'll see */
+ if (ei_status.dmaing)
+ {
+ printk(KERN_EMERG "%s: DMAing conflict in ne_block_output."
+ "[DMAstat:%d][irqlock:%d]\n",
+ dev->name, ei_status.dmaing, ei_status.irqlock);
+ return;
+ }
+ ei_status.dmaing |= 0x01;
+ /* We should already be in page 0, but to be safe... */
+ outb_p(E8390_PAGE0+E8390_START+E8390_NODMA, nic_base + NE_CMD);
+
+#ifdef NE_SANITY_CHECK
+retry:
+#endif
+
+#ifdef NE8390_RW_BUGFIX
+ /* Handle the read-before-write bug the same way as the
+ Crynwr packet driver -- the NatSemi method doesn't work.
+ Actually this doesn't always work either, but if you have
+ problems with your NEx000 this is better than nothing! */
+
+ outb_p(0x42, nic_base + EN0_RCNTLO);
+ outb_p(0x00, nic_base + EN0_RCNTHI);
+ outb_p(0x42, nic_base + EN0_RSARLO);
+ outb_p(0x00, nic_base + EN0_RSARHI);
+ outb_p(E8390_RREAD+E8390_START, nic_base + NE_CMD);
+ /* Make certain that the dummy read has occurred. */
+ udelay(6);
+#endif
+
+ outb_p(ENISR_RDC, nic_base + EN0_ISR);
+
+ /* Now the normal output. */
+ outb_p(count & 0xff, nic_base + EN0_RCNTLO);
+ outb_p(count >> 8, nic_base + EN0_RCNTHI);
+ outb_p(0x00, nic_base + EN0_RSARLO);
+ outb_p(start_page, nic_base + EN0_RSARHI);
+
+ outb_p(E8390_RWRITE+E8390_START, nic_base + NE_CMD);
+ if (ei_status.word16) {
+ outsw(NE_BASE + NE_DATAPORT, buf, count>>1);
+ } else {
+ outsb(NE_BASE + NE_DATAPORT, buf, count);
+ }
+
+ dma_start = jiffies;
+
+#ifdef NE_SANITY_CHECK
+ /* This was for the ALPHA version only, but enough people have
+ been encountering problems so it is still here. */
+
+ if (ei_debug > 1)
+ {
+ /* DMA termination address check... */
+ int addr, tries = 20;
+ do {
+ int high = inb_p(nic_base + EN0_RSARHI);
+ int low = inb_p(nic_base + EN0_RSARLO);
+ addr = (high << 8) + low;
+ if ((start_page << 8) + count == addr)
+ break;
+ } while (--tries > 0);
+
+ if (tries <= 0)
+ {
+ printk(KERN_WARNING "%s: Tx packet transfer address mismatch,"
+ "%#4.4x (expected) vs. %#4.4x (actual).\n",
+ dev->name, (start_page << 8) + count, addr);
+ if (retries++ == 0)
+ goto retry;
+ }
+ }
+#endif
+
+ while ((inb_p(nic_base + EN0_ISR) & ENISR_RDC) == 0)
+ if (jiffies - dma_start > 2*HZ/100) { /* 20ms */
+ printk(KERN_WARNING "%s: timeout waiting for Tx RDC.\n", dev->name);
+ ne_reset_8390(dev);
+ NS8390_init(dev,1);
+ break;
+ }
+
+ outb_p(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */
+ ei_status.dmaing &= ~0x01;
+ return;
+}
+
+static struct net_device dev_ne;
+
+static int __init init_module(void)
+{
+ struct net_device *dev = &dev_ne;
+ extern unsigned int opt_ne_base;
+
+ if ( opt_ne_base == 0 ) return 0;
+
+ dev->irq = 0;
+ dev->mem_end = 0;
+ dev->base_addr = opt_ne_base;
+ dev->init = ne_probe;
+
+ if ( register_netdev(dev) != 0 )
+ {
+ printk(KERN_WARNING "ne.c: No card found at io %#x\n", opt_ne_base);
+ }
+
+ return 0;
+}
+
+static void __exit cleanup_module(void)
+{
+ struct net_device *dev = &dev_ne;
+ if ( dev->priv != NULL )
+ {
+ void *priv = dev->priv;
+ free_irq(dev->irq, dev);
+ release_region(dev->base_addr, NE_IO_EXTENT);
+ unregister_netdev(dev);
+ kfree(priv);
+ }
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xen/drivers/net/net_init.c b/xen/drivers/net/net_init.c
new file mode 100644
index 0000000000..3081ec22b3
--- /dev/null
+++ b/xen/drivers/net/net_init.c
@@ -0,0 +1,732 @@
+/* net_init.c: Initialization for network devices. */
+/*
+ Written 1993,1994,1995 by Donald Becker.
+
+ The author may be reached as becker@scyld.com, or C/O
+ Scyld Computing Corporation
+ 410 Severn Ave., Suite 210
+ Annapolis MD 21403
+
+ This file contains the initialization for the "pl14+" style ethernet
+ drivers. It should eventually replace most of drivers/net/Space.c.
+ It's primary advantage is that it's able to allocate low-memory buffers.
+ A secondary advantage is that the dangerous NE*000 netcards can reserve
+ their I/O port region before the SCSI probes start.
+
+ Modifications/additions by Bjorn Ekwall <bj0rn@blox.se>:
+ ethdev_index[MAX_ETH_CARDS]
+ register_netdev() / unregister_netdev()
+
+ Modifications by Wolfgang Walter
+ Use dev_close cleanly so we always shut things down tidily.
+
+ Changed 29/10/95, Alan Cox to pass sockaddr's around for mac addresses.
+
+ 14/06/96 - Paul Gortmaker: Add generic eth_change_mtu() function.
+ 24/09/96 - Paul Norton: Add token-ring variants of the netdev functions.
+
+ 08/11/99 - Alan Cox: Got fed up of the mess in this file and cleaned it
+ up. We now share common code and have regularised name
+ allocation setups. Abolished the 16 card limits.
+ 03/19/2000 - jgarzik and Urban Widmark: init_etherdev 32-byte align
+ 03/21/2001 - jgarzik: alloc_etherdev and friends
+
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+//#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/if_ether.h>
+#include <linux/lib.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+//#include <linux/fddidevice.h>
+//#include <linux/hippidevice.h>
+//#include <linux/trdevice.h>
+//#include <linux/fcdevice.h>
+//#include <linux/if_arp.h>
+//#include <linux/if_ltalk.h>
+//#include <linux/rtnetlink.h>
+//#include <net/neighbour.h>
+
+#define rtnl_lock() ((void)0)
+#define rtnl_unlock() ((void)0)
+
+/* The network devices currently exist only in the socket namespace, so these
+ entries are unused. The only ones that make sense are
+ open start the ethercard
+ close stop the ethercard
+ ioctl To get statistics, perhaps set the interface port (AUI, BNC, etc.)
+ One can also imagine getting raw packets using
+ read & write
+ but this is probably better handled by a raw packet socket.
+
+ Given that almost all of these functions are handled in the current
+ socket-based scheme, putting ethercard devices in /dev/ seems pointless.
+
+ [Removed all support for /dev network devices. When someone adds
+ streams then by magic we get them, but otherwise they are un-needed
+ and a space waste]
+*/
+
+
+static struct net_device *alloc_netdev(int sizeof_priv, const char *mask,
+ void (*setup)(struct net_device *))
+{
+ struct net_device *dev;
+ int alloc_size;
+
+ /* ensure 32-byte alignment of the private area */
+ alloc_size = sizeof (*dev) + sizeof_priv + 31;
+
+ dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL);
+ if (dev == NULL)
+ {
+ printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n");
+ return NULL;
+ }
+
+ memset(dev, 0, alloc_size);
+
+ if (sizeof_priv)
+ dev->priv = (void *) (((long)(dev + 1) + 31) & ~31);
+
+ setup(dev);
+ strcpy(dev->name, mask);
+
+ return dev;
+}
+
+static struct net_device *init_alloc_dev(int sizeof_priv)
+{
+ struct net_device *dev;
+ int alloc_size;
+
+ /* ensure 32-byte alignment of the private area */
+ alloc_size = sizeof (*dev) + sizeof_priv + 31;
+
+ dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL);
+ if (dev == NULL)
+ {
+ printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n");
+ return NULL;
+ }
+
+ memset(dev, 0, alloc_size);
+
+ if (sizeof_priv)
+ dev->priv = (void *) (((long)(dev + 1) + 31) & ~31);
+
+ return dev;
+}
+
+/*
+ * Create and name a device from a prototype, then perform any needed
+ * setup.
+ */
+
+static struct net_device *init_netdev(struct net_device *dev, int sizeof_priv,
+ char *mask, void (*setup)(struct net_device *))
+{
+ int new_device = 0;
+
+ /*
+ * Allocate a device if one is not provided.
+ */
+
+ if (dev == NULL) {
+ dev=init_alloc_dev(sizeof_priv);
+ if(dev==NULL)
+ return NULL;
+ new_device = 1;
+ }
+
+ /*
+ * Allocate a name
+ */
+
+ if (dev->name[0] == '\0' || dev->name[0] == ' ') {
+ strcpy(dev->name, mask);
+ if (dev_alloc_name(dev, mask)<0) {
+ if (new_device)
+ kfree(dev);
+ return NULL;
+ }
+ }
+
+ //netdev_boot_setup_check(dev);
+
+ /*
+ * Configure via the caller provided setup function then
+ * register if needed.
+ */
+
+ setup(dev);
+
+ if (new_device) {
+ int err;
+
+ rtnl_lock();
+ err = register_netdevice(dev);
+ rtnl_unlock();
+
+ if (err < 0) {
+ kfree(dev);
+ dev = NULL;
+ }
+ }
+ return dev;
+}
+
+#if defined(CONFIG_HIPPI) || defined(CONFIG_TR) || defined(CONFIG_NET_FC)
+static int __register_netdev(struct net_device *dev)
+{
+ if (dev->init && dev->init(dev) != 0) {
+ unregister_netdev(dev);
+ return -EIO;
+ }
+ return 0;
+}
+#endif
+
+/**
+ * init_etherdev - Register ethernet device
+ * @dev: An ethernet device structure to be filled in, or %NULL if a new
+ * struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this ethernet device
+ *
+ * Fill in the fields of the device structure with ethernet-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv. A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv)
+{
+ return init_netdev(dev, sizeof_priv, "eth%d", ether_setup);
+}
+
+/**
+ * alloc_etherdev - Allocates and sets up an ethernet device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this ethernet device
+ *
+ * Fill in the fields of the device structure with ethernet-generic
+ * values. Basically does everything except registering the device.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_etherdev(int sizeof_priv)
+{
+ return alloc_netdev(sizeof_priv, "eth%d", ether_setup);
+}
+
+EXPORT_SYMBOL(init_etherdev);
+EXPORT_SYMBOL(alloc_etherdev);
+
+static int eth_mac_addr(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr=p;
+ if (netif_running(dev))
+ return -EBUSY;
+ memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+ return 0;
+}
+
+static int eth_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if ((new_mtu < 68) || (new_mtu > 1500))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+#ifdef CONFIG_FDDI
+
+/**
+ * init_fddidev - Register FDDI device
+ * @dev: A FDDI device structure to be filled in, or %NULL if a new
+ * struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this ethernet device
+ *
+ * Fill in the fields of the device structure with FDDI-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv. A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_fddidev(struct net_device *dev, int sizeof_priv)
+{
+ return init_netdev(dev, sizeof_priv, "fddi%d", fddi_setup);
+}
+
+/**
+ * alloc_fddidev - Register FDDI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this FDDI device
+ *
+ * Fill in the fields of the device structure with FDDI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_fddidev(int sizeof_priv)
+{
+ return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
+}
+
+EXPORT_SYMBOL(init_fddidev);
+EXPORT_SYMBOL(alloc_fddidev);
+
+static int fddi_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
+ return(-EINVAL);
+ dev->mtu = new_mtu;
+ return(0);
+}
+
+#endif /* CONFIG_FDDI */
+
+#ifdef CONFIG_HIPPI
+
+static int hippi_change_mtu(struct net_device *dev, int new_mtu)
+{
+ /*
+ * HIPPI's got these nice large MTUs.
+ */
+ if ((new_mtu < 68) || (new_mtu > 65280))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return(0);
+}
+
+
+/*
+ * For HIPPI we will actually use the lower 4 bytes of the hardware
+ * address as the I-FIELD rather than the actual hardware address.
+ */
+static int hippi_mac_addr(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+ if (netif_running(dev))
+ return -EBUSY;
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return 0;
+}
+
+
+/**
+ * init_hippi_dev - Register HIPPI device
+ * @dev: A HIPPI device structure to be filled in, or %NULL if a new
+ * struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this ethernet device
+ *
+ * Fill in the fields of the device structure with HIPPI-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv. A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_hippi_dev(struct net_device *dev, int sizeof_priv)
+{
+ return init_netdev(dev, sizeof_priv, "hip%d", hippi_setup);
+}
+
+/**
+ * alloc_hippi_dev - Register HIPPI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this HIPPI device
+ *
+ * Fill in the fields of the device structure with HIPPI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_hippi_dev(int sizeof_priv)
+{
+ return alloc_netdev(sizeof_priv, "hip%d", hippi_setup);
+}
+
+int register_hipdev(struct net_device *dev)
+{
+ return __register_netdev(dev);
+}
+
+void unregister_hipdev(struct net_device *dev)
+{
+ unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(init_hippi_dev);
+EXPORT_SYMBOL(alloc_hippi_dev);
+EXPORT_SYMBOL(register_hipdev);
+EXPORT_SYMBOL(unregister_hipdev);
+
+static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
+{
+ /* Never send broadcast/multicast ARP messages */
+ p->mcast_probes = 0;
+
+ /* In IPv6 unicast probes are valid even on NBMA,
+ * because they are encapsulated in normal IPv6 protocol.
+ * Should be a generic flag.
+ */
+ if (p->tbl->family != AF_INET6)
+ p->ucast_probes = 0;
+ return 0;
+}
+
+#endif /* CONFIG_HIPPI */
+
+void ether_setup(struct net_device *dev)
+{
+ /* Fill in the fields of the device structure with ethernet-generic values.
+ This should be in a common file instead of per-driver. */
+
+ dev->change_mtu = eth_change_mtu;
+ dev->hard_header = eth_header;
+ dev->rebuild_header = eth_rebuild_header;
+ dev->set_mac_address = eth_mac_addr;
+ dev->hard_header_cache = eth_header_cache;
+ dev->header_cache_update= eth_header_cache_update;
+ dev->hard_header_parse = eth_header_parse;
+
+ dev->type = 0; //ARPHRD_ETHER;
+ dev->hard_header_len = ETH_HLEN;
+ dev->mtu = 1500; /* eth_mtu */
+ dev->addr_len = ETH_ALEN;
+
+ memset(dev->broadcast,0xFF, ETH_ALEN);
+
+ /* New-style flags. */
+ dev->flags = IFF_BROADCAST|IFF_MULTICAST;
+}
+EXPORT_SYMBOL(ether_setup);
+
+#ifdef CONFIG_FDDI
+
+void fddi_setup(struct net_device *dev)
+{
+ /*
+ * Fill in the fields of the device structure with FDDI-generic values.
+ * This should be in a common file instead of per-driver.
+ */
+
+ dev->change_mtu = fddi_change_mtu;
+ dev->hard_header = fddi_header;
+ dev->rebuild_header = fddi_rebuild_header;
+
+ dev->type = ARPHRD_FDDI;
+ dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */
+ dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */
+ dev->addr_len = FDDI_K_ALEN;
+
+ memset(dev->broadcast, 0xFF, FDDI_K_ALEN);
+
+ /* New-style flags */
+ dev->flags = IFF_BROADCAST | IFF_MULTICAST;
+}
+EXPORT_SYMBOL(fddi_setup);
+
+#endif /* CONFIG_FDDI */
+
+#ifdef CONFIG_HIPPI
+void hippi_setup(struct net_device *dev)
+{
+ dev->set_multicast_list = NULL;
+ dev->change_mtu = hippi_change_mtu;
+ dev->hard_header = hippi_header;
+ dev->rebuild_header = hippi_rebuild_header;
+ dev->set_mac_address = hippi_mac_addr;
+ dev->hard_header_parse = NULL;
+ dev->hard_header_cache = NULL;
+ dev->header_cache_update = NULL;
+ dev->neigh_setup = hippi_neigh_setup_dev;
+
+ /*
+ * We don't support HIPPI `ARP' for the time being, and probably
+ * never will unless someone else implements it. However we
+ * still need a fake ARPHRD to make ifconfig and friends play ball.
+ */
+ dev->type = ARPHRD_HIPPI;
+ dev->hard_header_len = HIPPI_HLEN;
+ dev->mtu = 65280;
+ dev->addr_len = HIPPI_ALEN;
+
+ memset(dev->broadcast, 0xFF, HIPPI_ALEN);
+
+ /*
+ * HIPPI doesn't support broadcast+multicast and we only use
+ * static ARP tables. ARP is disabled by hippi_neigh_setup_dev.
+ */
+ dev->flags = 0;
+}
+EXPORT_SYMBOL(hippi_setup);
+#endif /* CONFIG_HIPPI */
+
+#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
+
+static int ltalk_change_mtu(struct net_device *dev, int mtu)
+{
+ return -EINVAL;
+}
+
+static int ltalk_mac_addr(struct net_device *dev, void *addr)
+{
+ return -EINVAL;
+}
+
+
+void ltalk_setup(struct net_device *dev)
+{
+ /* Fill in the fields of the device structure with localtalk-generic values. */
+
+ dev->change_mtu = ltalk_change_mtu;
+ dev->hard_header = NULL;
+ dev->rebuild_header = NULL;
+ dev->set_mac_address = ltalk_mac_addr;
+ dev->hard_header_cache = NULL;
+ dev->header_cache_update= NULL;
+
+ dev->type = ARPHRD_LOCALTLK;
+ dev->hard_header_len = LTALK_HLEN;
+ dev->mtu = LTALK_MTU;
+ dev->addr_len = LTALK_ALEN;
+
+ dev->broadcast[0] = 0xFF;
+
+ dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP;
+}
+EXPORT_SYMBOL(ltalk_setup);
+
+#endif /* CONFIG_ATALK || CONFIG_ATALK_MODULE */
+
+int register_netdev(struct net_device *dev)
+{
+ int err;
+
+ rtnl_lock();
+
+ /*
+ * If the name is a format string the caller wants us to
+ * do a name allocation
+ */
+
+ if (strchr(dev->name, '%'))
+ {
+ err = dev_alloc_name(dev, dev->name);
+ if (err < 0)
+ goto out;
+ }
+
+ /*
+ * Back compatibility hook. Kill this one in 2.5
+ */
+
+ if (dev->name[0]==0 || dev->name[0]==' ')
+ {
+ err = dev_alloc_name(dev, "eth%d");
+ if (err < 0)
+ goto out;
+ }
+
+ err = register_netdevice(dev);
+
+out:
+ rtnl_unlock();
+ return err;
+}
+
+void unregister_netdev(struct net_device *dev)
+{
+ rtnl_lock();
+ unregister_netdevice(dev);
+ rtnl_unlock();
+}
+
+EXPORT_SYMBOL(register_netdev);
+EXPORT_SYMBOL(unregister_netdev);
+
+#ifdef CONFIG_TR
+
+void tr_setup(struct net_device *dev)
+{
+ /*
+ * Configure and register
+ */
+
+ dev->hard_header = tr_header;
+ dev->rebuild_header = tr_rebuild_header;
+
+ dev->type = ARPHRD_IEEE802_TR;
+ dev->hard_header_len = TR_HLEN;
+ dev->mtu = 2000;
+ dev->addr_len = TR_ALEN;
+
+ memset(dev->broadcast,0xFF, TR_ALEN);
+
+ /* New-style flags. */
+ dev->flags = IFF_BROADCAST | IFF_MULTICAST ;
+}
+
+/**
+ * init_trdev - Register token ring device
+ * @dev: A token ring device structure to be filled in, or %NULL if a new
+ * struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this ethernet device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv. A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_trdev(struct net_device *dev, int sizeof_priv)
+{
+ return init_netdev(dev, sizeof_priv, "tr%d", tr_setup);
+}
+
+/**
+ * alloc_trdev - Register token ring device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this token ring device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_trdev(int sizeof_priv)
+{
+ return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
+}
+
+int register_trdev(struct net_device *dev)
+{
+ return __register_netdev(dev);
+}
+
+void unregister_trdev(struct net_device *dev)
+{
+ unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(tr_setup);
+EXPORT_SYMBOL(init_trdev);
+EXPORT_SYMBOL(alloc_trdev);
+EXPORT_SYMBOL(register_trdev);
+EXPORT_SYMBOL(unregister_trdev);
+
+#endif /* CONFIG_TR */
+
+
+#ifdef CONFIG_NET_FC
+
+void fc_setup(struct net_device *dev)
+{
+ dev->hard_header = fc_header;
+ dev->rebuild_header = fc_rebuild_header;
+
+ dev->type = ARPHRD_IEEE802;
+ dev->hard_header_len = FC_HLEN;
+ dev->mtu = 2024;
+ dev->addr_len = FC_ALEN;
+
+ memset(dev->broadcast,0xFF, FC_ALEN);
+
+ /* New-style flags. */
+ dev->flags = IFF_BROADCAST;
+}
+
+/**
+ * init_fcdev - Register fibre channel device
+ * @dev: A fibre channel device structure to be filled in, or %NULL if a new
+ * struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this ethernet device
+ *
+ * Fill in the fields of the device structure with fibre channel-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv. A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_fcdev(struct net_device *dev, int sizeof_priv)
+{
+ return init_netdev(dev, sizeof_priv, "fc%d", fc_setup);
+}
+
+/**
+ * alloc_fcdev - Register fibre channel device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ * for this fibre channel device
+ *
+ * Fill in the fields of the device structure with fibre channel-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_fcdev(int sizeof_priv)
+{
+ return alloc_netdev(sizeof_priv, "fc%d", fc_setup);
+}
+
+int register_fcdev(struct net_device *dev)
+{
+ return __register_netdev(dev);
+}
+
+void unregister_fcdev(struct net_device *dev)
+{
+ unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(fc_setup);
+EXPORT_SYMBOL(init_fcdev);
+EXPORT_SYMBOL(alloc_fcdev);
+EXPORT_SYMBOL(register_fcdev);
+EXPORT_SYMBOL(unregister_fcdev);
+
+#endif /* CONFIG_NET_FC */
+
diff --git a/xen/drivers/net/setup.c b/xen/drivers/net/setup.c
new file mode 100644
index 0000000000..1352a1cb4b
--- /dev/null
+++ b/xen/drivers/net/setup.c
@@ -0,0 +1,173 @@
+
+/*
+ * New style setup code for the network devices
+ */
+
+#include <linux/config.h>
+#include <linux/netdevice.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+//#include <linux/netlink.h>
+
+extern int slip_init_ctrl_dev(void);
+extern int x25_asy_init_ctrl_dev(void);
+
+extern int dmascc_init(void);
+
+extern int awc4500_pci_probe(void);
+extern int awc4500_isa_probe(void);
+extern int awc4500_pnp_probe(void);
+extern int awc4500_365_probe(void);
+extern int arcnet_init(void);
+extern int scc_enet_init(void);
+extern int fec_enet_init(void);
+extern int dlci_setup(void);
+extern int sdla_setup(void);
+extern int sdla_c_setup(void);
+extern int comx_init(void);
+extern int lmc_setup(void);
+
+extern int madgemc_probe(void);
+extern int uml_net_probe(void);
+
+/* Pad device name to IFNAMSIZ=16. F.e. __PAD6 is string of 9 zeros. */
+#define __PAD6 "\0\0\0\0\0\0\0\0\0"
+#define __PAD5 __PAD6 "\0"
+#define __PAD4 __PAD5 "\0"
+#define __PAD3 __PAD4 "\0"
+#define __PAD2 __PAD3 "\0"
+
+
+/*
+ * Devices in this list must do new style probing. That is they must
+ * allocate their own device objects and do their own bus scans.
+ */
+
+struct net_probe
+{
+ int (*probe)(void);
+ int status; /* non-zero if autoprobe has failed */
+};
+
+static struct net_probe pci_probes[] __initdata = {
+ /*
+ * Early setup devices
+ */
+
+#if defined(CONFIG_DMASCC)
+ {dmascc_init, 0},
+#endif
+#if defined(CONFIG_DLCI)
+ {dlci_setup, 0},
+#endif
+#if defined(CONFIG_SDLA)
+ {sdla_c_setup, 0},
+#endif
+#if defined(CONFIG_ARCNET)
+ {arcnet_init, 0},
+#endif
+#if defined(CONFIG_SCC_ENET)
+ {scc_enet_init, 0},
+#endif
+#if defined(CONFIG_FEC_ENET)
+ {fec_enet_init, 0},
+#endif
+#if defined(CONFIG_COMX)
+ {comx_init, 0},
+#endif
+
+#if defined(CONFIG_LANMEDIA)
+ {lmc_setup, 0},
+#endif
+
+/*
+*
+* Wireless non-HAM
+*
+*/
+#ifdef CONFIG_AIRONET4500_NONCS
+
+#ifdef CONFIG_AIRONET4500_PCI
+ {awc4500_pci_probe,0},
+#endif
+
+#ifdef CONFIG_AIRONET4500_PNP
+ {awc4500_pnp_probe,0},
+#endif
+
+#endif
+
+/*
+ * Token Ring Drivers
+ */
+#ifdef CONFIG_MADGEMC
+ {madgemc_probe, 0},
+#endif
+#ifdef CONFIG_UML_NET
+ {uml_net_probe, 0},
+#endif
+
+ {NULL, 0},
+};
+
+
+/*
+ * Run the updated device probes. These do not need a device passed
+ * into them.
+ */
+
+static void __init network_probe(void)
+{
+ struct net_probe *p = pci_probes;
+
+ while (p->probe != NULL)
+ {
+ p->status = p->probe();
+ p++;
+ }
+}
+
+
+/*
+ * Initialise the line discipline drivers
+ */
+
+static void __init network_ldisc_init(void)
+{
+#if defined(CONFIG_SLIP)
+ slip_init_ctrl_dev();
+#endif
+#if defined(CONFIG_X25_ASY)
+ x25_asy_init_ctrl_dev();
+#endif
+}
+
+
+static void __init special_device_init(void)
+{
+#ifdef CONFIG_NET_SB1000
+ {
+ extern int sb1000_probe(struct net_device *dev);
+ static struct net_device sb1000_dev =
+ {
+ "cm0" __PAD3, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, sb1000_probe
+ };
+ register_netdev(&sb1000_dev);
+ }
+#endif
+}
+
+/*
+ * Initialise network devices
+ */
+
+void __init net_device_init(void)
+{
+ /* Devices supporting the new probing API */
+ network_probe();
+ /* Line disciplines */
+ network_ldisc_init();
+ /* Special devices */
+ special_device_init();
+ /* That kicks off the legacy init functions */
+}
diff --git a/xen/drivers/net/tg3.c b/xen/drivers/net/tg3.c
new file mode 100644
index 0000000000..41f680904b
--- /dev/null
+++ b/xen/drivers/net/tg3.c
@@ -0,0 +1,6884 @@
+/* $Id: tg3.c,v 1.43.2.80 2002/03/14 00:10:04 davem Exp $
+ * tg3.c: Broadcom Tigon3 ethernet driver.
+ *
+ * Copyright (C) 2001, 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2001, 2002 Jeff Garzik (jgarzik@pobox.com)
+ */
+
+#include <linux/config.h>
+
+#include <linux/module.h>
+
+//#include <linux/kernel.h>
+#include <linux/types.h>
+//#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+
+#ifndef PCI_DMA_BUS_IS_PHYS
+#define PCI_DMA_BUS_IS_PHYS 1
+#endif
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#define TG3_VLAN_TAG_USED 1
+#else
+#define TG3_VLAN_TAG_USED 0
+#endif
+
+#ifdef NETIF_F_TSO
+/* XXX some bug in tso firmware hangs tx cpu, disabled until fixed */
+#define TG3_DO_TSO 0
+#else
+#define TG3_DO_TSO 0
+#endif
+
+#include "tg3.h"
+
+#define DRV_MODULE_NAME "tg3"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "1.2a"
+#define DRV_MODULE_RELDATE "Dec 9, 2002"
+
+#define TG3_DEF_MAC_MODE 0
+#define TG3_DEF_RX_MODE 0
+#define TG3_DEF_TX_MODE 0
+#define TG3_DEF_MSG_ENABLE \
+ (NETIF_MSG_DRV | \
+ NETIF_MSG_PROBE | \
+ NETIF_MSG_LINK | \
+ NETIF_MSG_TIMER | \
+ NETIF_MSG_IFDOWN | \
+ NETIF_MSG_IFUP | \
+ NETIF_MSG_RX_ERR | \
+ NETIF_MSG_TX_ERR)
+
+/* length of time before we decide the hardware is borked,
+ * and dev->tx_timeout() should be called to fix the problem
+ */
+#define TG3_TX_TIMEOUT (5 * HZ)
+
+/* hardware minimum and maximum for a single frame's data payload */
+#define TG3_MIN_MTU 60
+#define TG3_MAX_MTU 9000
+
+/* These numbers seem to be hard coded in the NIC firmware somehow.
+ * You can't change the ring sizes, but you can change where you place
+ * them in the NIC onboard memory.
+ */
+#define TG3_RX_RING_SIZE 512
+#define TG3_DEF_RX_RING_PENDING 200
+#define TG3_RX_JUMBO_RING_SIZE 256
+#define TG3_DEF_RX_JUMBO_RING_PENDING 100
+#define TG3_RX_RCB_RING_SIZE 1024
+#define TG3_TX_RING_SIZE 512
+#define TG3_DEF_TX_RING_PENDING (TG3_TX_RING_SIZE - 1)
+
+#define TG3_RX_RING_BYTES (sizeof(struct tg3_rx_buffer_desc) * \
+ TG3_RX_RING_SIZE)
+#define TG3_RX_JUMBO_RING_BYTES (sizeof(struct tg3_rx_buffer_desc) * \
+ TG3_RX_JUMBO_RING_SIZE)
+#define TG3_RX_RCB_RING_BYTES (sizeof(struct tg3_rx_buffer_desc) * \
+ TG3_RX_RCB_RING_SIZE)
+#define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \
+ TG3_TX_RING_SIZE)
+#define TX_RING_GAP(TP) \
+ (TG3_TX_RING_SIZE - (TP)->tx_pending)
+#define TX_BUFFS_AVAIL(TP) \
+ (((TP)->tx_cons <= (TP)->tx_prod) ? \
+ (TP)->tx_cons + (TP)->tx_pending - (TP)->tx_prod : \
+ (TP)->tx_cons - (TP)->tx_prod - TX_RING_GAP(TP))
+#define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1))
+
+#define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64)
+#define RX_JUMBO_PKT_BUF_SZ (9046 + tp->rx_offset + 64)
+
+/* minimum number of free TX descriptors required to wake up TX process */
+#define TG3_TX_WAKEUP_THRESH (TG3_TX_RING_SIZE / 4)
+
+static char version[] __devinitdata =
+ DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox.com)");
+MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver");
+MODULE_LICENSE("GPL");
+MODULE_PARM(tg3_debug, "i");
+MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value");
+
+static int tg3_debug = -1; /* -1 == use TG3_DEF_MSG_ENABLE as value */
+
+static struct pci_device_id tg3_pci_tbl[] __devinitdata = {
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_SYSKONNECT, 0x4400,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, tg3_pci_tbl);
+
+static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val)
+{
+ if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->indirect_lock, flags);
+ pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off);
+ pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val);
+ spin_unlock_irqrestore(&tp->indirect_lock, flags);
+ } else {
+ writel(val, tp->regs + off);
+ }
+}
+
+#define tw32(reg,val) tg3_write_indirect_reg32(tp,(reg),(val))
+#define tw32_mailbox(reg, val) writel(((val) & 0xffffffff), tp->regs + (reg))
+#define tw16(reg,val) writew(((val) & 0xffff), tp->regs + (reg))
+#define tw8(reg,val) writeb(((val) & 0xff), tp->regs + (reg))
+#define tr32(reg) readl(tp->regs + (reg))
+#define tr16(reg) readw(tp->regs + (reg))
+#define tr8(reg) readb(tp->regs + (reg))
+
+static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->indirect_lock, flags);
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
+
+ /* Always leave this as zero. */
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
+ spin_unlock_irqrestore(&tp->indirect_lock, flags);
+}
+
+static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->indirect_lock, flags);
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
+ pci_read_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
+
+ /* Always leave this as zero. */
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
+ spin_unlock_irqrestore(&tp->indirect_lock, flags);
+}
+
+static void tg3_disable_ints(struct tg3 *tp)
+{
+ tw32(TG3PCI_MISC_HOST_CTRL,
+ (tp->misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT));
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+}
+
+static void tg3_enable_ints(struct tg3 *tp)
+{
+ tw32(TG3PCI_MISC_HOST_CTRL,
+ (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000);
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+
+ if (tp->hw_status->status & SD_STATUS_UPDATED)
+ tw32(GRC_LOCAL_CTRL,
+ tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
+}
+
+static void tg3_switch_clocks(struct tg3 *tp)
+{
+ if (tr32(TG3PCI_CLOCK_CTRL) & CLOCK_CTRL_44MHZ_CORE) {
+ tw32(TG3PCI_CLOCK_CTRL,
+ (CLOCK_CTRL_44MHZ_CORE | CLOCK_CTRL_ALTCLK));
+ tr32(TG3PCI_CLOCK_CTRL);
+ udelay(40);
+ tw32(TG3PCI_CLOCK_CTRL,
+ (CLOCK_CTRL_ALTCLK));
+ tr32(TG3PCI_CLOCK_CTRL);
+ udelay(40);
+ }
+ tw32(TG3PCI_CLOCK_CTRL, 0);
+ tr32(TG3PCI_CLOCK_CTRL);
+ udelay(40);
+}
+
+#define PHY_BUSY_LOOPS 5000
+
+static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
+{
+ u32 frame_val;
+ int loops, ret;
+
+ if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+ tw32(MAC_MI_MODE,
+ (tp->mi_mode & ~MAC_MI_MODE_AUTO_POLL));
+ tr32(MAC_MI_MODE);
+ udelay(40);
+ }
+
+ *val = 0xffffffff;
+
+ frame_val = ((PHY_ADDR << MI_COM_PHY_ADDR_SHIFT) &
+ MI_COM_PHY_ADDR_MASK);
+ frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) &
+ MI_COM_REG_ADDR_MASK);
+ frame_val |= (MI_COM_CMD_READ | MI_COM_START);
+
+ tw32(MAC_MI_COM, frame_val);
+ tr32(MAC_MI_COM);
+
+ loops = PHY_BUSY_LOOPS;
+ while (loops-- > 0) {
+ udelay(10);
+ frame_val = tr32(MAC_MI_COM);
+
+ if ((frame_val & MI_COM_BUSY) == 0) {
+ udelay(5);
+ frame_val = tr32(MAC_MI_COM);
+ break;
+ }
+ }
+
+ ret = -EBUSY;
+ if (loops > 0) {
+ *val = frame_val & MI_COM_DATA_MASK;
+ ret = 0;
+ }
+
+ if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+ tw32(MAC_MI_MODE, tp->mi_mode);
+ tr32(MAC_MI_MODE);
+ udelay(40);
+ }
+
+ return ret;
+}
+
+static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
+{
+ u32 frame_val;
+ int loops, ret;
+
+ if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+ tw32(MAC_MI_MODE,
+ (tp->mi_mode & ~MAC_MI_MODE_AUTO_POLL));
+ tr32(MAC_MI_MODE);
+ udelay(40);
+ }
+
+ frame_val = ((PHY_ADDR << MI_COM_PHY_ADDR_SHIFT) &
+ MI_COM_PHY_ADDR_MASK);
+ frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) &
+ MI_COM_REG_ADDR_MASK);
+ frame_val |= (val & MI_COM_DATA_MASK);
+ frame_val |= (MI_COM_CMD_WRITE | MI_COM_START);
+
+ tw32(MAC_MI_COM, frame_val);
+ tr32(MAC_MI_COM);
+
+ loops = PHY_BUSY_LOOPS;
+ while (loops-- > 0) {
+ udelay(10);
+ frame_val = tr32(MAC_MI_COM);
+ if ((frame_val & MI_COM_BUSY) == 0) {
+ udelay(5);
+ frame_val = tr32(MAC_MI_COM);
+ break;
+ }
+ }
+
+ ret = -EBUSY;
+ if (loops > 0)
+ ret = 0;
+
+ if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+ tw32(MAC_MI_MODE, tp->mi_mode);
+ tr32(MAC_MI_MODE);
+ udelay(40);
+ }
+
+ return ret;
+}
+
+/* This will reset the tigon3 PHY if there is no valid
+ * link unless the FORCE argument is non-zero.
+ */
+static int tg3_phy_reset(struct tg3 *tp, int force)
+{
+ u32 phy_status, phy_control;
+ int err, limit;
+
+ err = tg3_readphy(tp, MII_BMSR, &phy_status);
+ err |= tg3_readphy(tp, MII_BMSR, &phy_status);
+ if (err != 0)
+ return -EBUSY;
+
+ /* If we have link, and not forcing a reset, then nothing
+ * to do.
+ */
+ if ((phy_status & BMSR_LSTATUS) != 0 && (force == 0))
+ return 0;
+
+ /* OK, reset it, and poll the BMCR_RESET bit until it
+ * clears or we time out.
+ */
+ phy_control = BMCR_RESET;
+ err = tg3_writephy(tp, MII_BMCR, phy_control);
+ if (err != 0)
+ return -EBUSY;
+
+ limit = 5000;
+ while (limit--) {
+ err = tg3_readphy(tp, MII_BMCR, &phy_control);
+ if (err != 0)
+ return -EBUSY;
+
+ if ((phy_control & BMCR_RESET) == 0) {
+ udelay(40);
+ return 0;
+ }
+ udelay(10);
+ }
+
+ return -EBUSY;
+}
+
+static int tg3_setup_phy(struct tg3 *);
+static int tg3_halt(struct tg3 *);
+
+static int tg3_set_power_state(struct tg3 *tp, int state)
+{
+ u32 misc_host_ctrl;
+ u16 power_control, power_caps;
+ int pm = tp->pm_cap;
+
+ /* Make sure register accesses (indirect or otherwise)
+ * will function correctly.
+ */
+ pci_write_config_dword(tp->pdev,
+ TG3PCI_MISC_HOST_CTRL,
+ tp->misc_host_ctrl);
+
+ pci_read_config_word(tp->pdev,
+ pm + PCI_PM_CTRL,
+ &power_control);
+ power_control |= PCI_PM_CTRL_PME_STATUS;
+ power_control &= ~(PCI_PM_CTRL_STATE_MASK);
+ switch (state) {
+ case 0:
+ power_control |= 0;
+ pci_write_config_word(tp->pdev,
+ pm + PCI_PM_CTRL,
+ power_control);
+ tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
+ tr32(GRC_LOCAL_CTRL);
+ udelay(100);
+
+ return 0;
+
+ case 1:
+ power_control |= 1;
+ break;
+
+ case 2:
+ power_control |= 2;
+ break;
+
+ case 3:
+ power_control |= 3;
+ break;
+
+ default:
+ printk(KERN_WARNING PFX "%s: Invalid power state (%d) "
+ "requested.\n",
+ tp->dev->name, state);
+ return -EINVAL;
+ };
+
+ power_control |= PCI_PM_CTRL_PME_ENABLE;
+
+ misc_host_ctrl = tr32(TG3PCI_MISC_HOST_CTRL);
+ tw32(TG3PCI_MISC_HOST_CTRL,
+ misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT);
+
+ if (tp->link_config.phy_is_low_power == 0) {
+ tp->link_config.phy_is_low_power = 1;
+ tp->link_config.orig_speed = tp->link_config.speed;
+ tp->link_config.orig_duplex = tp->link_config.duplex;
+ tp->link_config.orig_autoneg = tp->link_config.autoneg;
+ }
+
+ if (tp->phy_id != PHY_ID_SERDES) {
+ tp->link_config.speed = SPEED_10;
+ tp->link_config.duplex = DUPLEX_HALF;
+ tp->link_config.autoneg = AUTONEG_ENABLE;
+ tg3_setup_phy(tp);
+ }
+
+ tg3_halt(tp);
+
+ pci_read_config_word(tp->pdev, pm + PCI_PM_PMC, &power_caps);
+
+ if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE) {
+ u32 mac_mode;
+
+ if (tp->phy_id != PHY_ID_SERDES) {
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x5a);
+ udelay(40);
+
+ mac_mode = MAC_MODE_PORT_MODE_MII;
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 ||
+ !(tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB))
+ mac_mode |= MAC_MODE_LINK_POLARITY;
+ } else {
+ mac_mode = MAC_MODE_PORT_MODE_TBI;
+ }
+
+
+ if (((power_caps & PCI_PM_CAP_PME_D3cold) &&
+ (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)))
+ mac_mode |= MAC_MODE_MAGIC_PKT_ENABLE;
+
+ tw32(MAC_MODE, mac_mode);
+ tr32(MAC_MODE);
+ udelay(100);
+
+ tw32(MAC_RX_MODE, RX_MODE_ENABLE);
+ tr32(MAC_RX_MODE);
+ udelay(10);
+ }
+
+ if (tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB) {
+ u32 base_val;
+
+ base_val = 0;
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)
+ base_val |= (CLOCK_CTRL_RXCLK_DISABLE |
+ CLOCK_CTRL_TXCLK_DISABLE);
+
+ tw32(TG3PCI_CLOCK_CTRL, base_val |
+ CLOCK_CTRL_ALTCLK);
+ tr32(TG3PCI_CLOCK_CTRL);
+ udelay(40);
+
+ tw32(TG3PCI_CLOCK_CTRL, base_val |
+ CLOCK_CTRL_ALTCLK |
+ CLOCK_CTRL_44MHZ_CORE);
+ tr32(TG3PCI_CLOCK_CTRL);
+ udelay(40);
+
+ tw32(TG3PCI_CLOCK_CTRL, base_val |
+ CLOCK_CTRL_44MHZ_CORE);
+ tr32(TG3PCI_CLOCK_CTRL);
+ udelay(40);
+ } else {
+ u32 base_val;
+
+ base_val = 0;
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)
+ base_val |= (CLOCK_CTRL_RXCLK_DISABLE |
+ CLOCK_CTRL_TXCLK_DISABLE);
+
+ tw32(TG3PCI_CLOCK_CTRL, base_val |
+ CLOCK_CTRL_ALTCLK |
+ CLOCK_CTRL_PWRDOWN_PLL133);
+ tr32(TG3PCI_CLOCK_CTRL);
+ udelay(40);
+ }
+
+ if (!(tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) &&
+ (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)) {
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701) {
+ tw32(GRC_LOCAL_CTRL,
+ (GRC_LCLCTRL_GPIO_OE0 |
+ GRC_LCLCTRL_GPIO_OE1 |
+ GRC_LCLCTRL_GPIO_OE2 |
+ GRC_LCLCTRL_GPIO_OUTPUT0 |
+ GRC_LCLCTRL_GPIO_OUTPUT1));
+ tr32(GRC_LOCAL_CTRL);
+ udelay(100);
+ } else {
+ tw32(GRC_LOCAL_CTRL,
+ (GRC_LCLCTRL_GPIO_OE0 |
+ GRC_LCLCTRL_GPIO_OE1 |
+ GRC_LCLCTRL_GPIO_OE2 |
+ GRC_LCLCTRL_GPIO_OUTPUT1 |
+ GRC_LCLCTRL_GPIO_OUTPUT2));
+ tr32(GRC_LOCAL_CTRL);
+ udelay(100);
+
+ tw32(GRC_LOCAL_CTRL,
+ (GRC_LCLCTRL_GPIO_OE0 |
+ GRC_LCLCTRL_GPIO_OE1 |
+ GRC_LCLCTRL_GPIO_OE2 |
+ GRC_LCLCTRL_GPIO_OUTPUT0 |
+ GRC_LCLCTRL_GPIO_OUTPUT1 |
+ GRC_LCLCTRL_GPIO_OUTPUT2));
+ tr32(GRC_LOCAL_CTRL);
+ udelay(100);
+
+ tw32(GRC_LOCAL_CTRL,
+ (GRC_LCLCTRL_GPIO_OE0 |
+ GRC_LCLCTRL_GPIO_OE1 |
+ GRC_LCLCTRL_GPIO_OE2 |
+ GRC_LCLCTRL_GPIO_OUTPUT0 |
+ GRC_LCLCTRL_GPIO_OUTPUT1));
+ tr32(GRC_LOCAL_CTRL);
+ udelay(100);
+ }
+ }
+
+ /* Finally, set the new power state. */
+ pci_write_config_word(tp->pdev, pm + PCI_PM_CTRL, power_control);
+
+ return 0;
+}
+
+static void tg3_link_report(struct tg3 *tp)
+{
+ if (!netif_carrier_ok(tp->dev)) {
+ printk(KERN_INFO PFX "%s: Link is down.\n", tp->dev->name);
+ } else {
+ printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex.\n",
+ tp->dev->name,
+ (tp->link_config.active_speed == SPEED_1000 ?
+ 1000 :
+ (tp->link_config.active_speed == SPEED_100 ?
+ 100 : 10)),
+ (tp->link_config.active_duplex == DUPLEX_FULL ?
+ "full" : "half"));
+
+ printk(KERN_INFO PFX "%s: Flow control is %s for TX and "
+ "%s for RX.\n",
+ tp->dev->name,
+ (tp->tg3_flags & TG3_FLAG_TX_PAUSE) ? "on" : "off",
+ (tp->tg3_flags & TG3_FLAG_RX_PAUSE) ? "on" : "off");
+ }
+}
+
+static void tg3_setup_flow_control(struct tg3 *tp, u32 local_adv, u32 remote_adv)
+{
+ u32 new_tg3_flags = 0;
+
+ if (local_adv & ADVERTISE_PAUSE_CAP) {
+ if (local_adv & ADVERTISE_PAUSE_ASYM) {
+ if (remote_adv & LPA_PAUSE_CAP)
+ new_tg3_flags |=
+ (TG3_FLAG_RX_PAUSE |
+ TG3_FLAG_TX_PAUSE);
+ else if (remote_adv & LPA_PAUSE_ASYM)
+ new_tg3_flags |=
+ (TG3_FLAG_RX_PAUSE);
+ } else {
+ if (remote_adv & LPA_PAUSE_CAP)
+ new_tg3_flags |=
+ (TG3_FLAG_RX_PAUSE |
+ TG3_FLAG_TX_PAUSE);
+ }
+ } else if (local_adv & ADVERTISE_PAUSE_ASYM) {
+ if ((remote_adv & LPA_PAUSE_CAP) &&
+ (remote_adv & LPA_PAUSE_ASYM))
+ new_tg3_flags |= TG3_FLAG_TX_PAUSE;
+ }
+
+ tp->tg3_flags &= ~(TG3_FLAG_RX_PAUSE | TG3_FLAG_TX_PAUSE);
+ tp->tg3_flags |= new_tg3_flags;
+
+ if (new_tg3_flags & TG3_FLAG_RX_PAUSE)
+ tp->rx_mode |= RX_MODE_FLOW_CTRL_ENABLE;
+ else
+ tp->rx_mode &= ~RX_MODE_FLOW_CTRL_ENABLE;
+
+ if (new_tg3_flags & TG3_FLAG_TX_PAUSE)
+ tp->tx_mode |= TX_MODE_FLOW_CTRL_ENABLE;
+ else
+ tp->tx_mode &= ~TX_MODE_FLOW_CTRL_ENABLE;
+}
+
+static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex)
+{
+ switch (val & MII_TG3_AUX_STAT_SPDMASK) {
+ case MII_TG3_AUX_STAT_10HALF:
+ *speed = SPEED_10;
+ *duplex = DUPLEX_HALF;
+ break;
+
+ case MII_TG3_AUX_STAT_10FULL:
+ *speed = SPEED_10;
+ *duplex = DUPLEX_FULL;
+ break;
+
+ case MII_TG3_AUX_STAT_100HALF:
+ *speed = SPEED_100;
+ *duplex = DUPLEX_HALF;
+ break;
+
+ case MII_TG3_AUX_STAT_100FULL:
+ *speed = SPEED_100;
+ *duplex = DUPLEX_FULL;
+ break;
+
+ case MII_TG3_AUX_STAT_1000HALF:
+ *speed = SPEED_1000;
+ *duplex = DUPLEX_HALF;
+ break;
+
+ case MII_TG3_AUX_STAT_1000FULL:
+ *speed = SPEED_1000;
+ *duplex = DUPLEX_FULL;
+ break;
+
+ default:
+ *speed = SPEED_INVALID;
+ *duplex = DUPLEX_INVALID;
+ break;
+ };
+}
+
+static int tg3_phy_copper_begin(struct tg3 *tp, int wait_for_link)
+{
+ u32 new_adv;
+ int i;
+
+ if (tp->link_config.phy_is_low_power) {
+ /* Entering low power mode. Disable gigabit and
+ * 100baseT advertisements.
+ */
+ tg3_writephy(tp, MII_TG3_CTRL, 0);
+
+ new_adv = (ADVERTISE_10HALF | ADVERTISE_10FULL |
+ ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
+ if (tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB)
+ new_adv |= (ADVERTISE_100HALF | ADVERTISE_100FULL);
+
+ tg3_writephy(tp, MII_ADVERTISE, new_adv);
+ } else if (tp->link_config.speed == SPEED_INVALID) {
+ tp->link_config.advertising =
+ (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
+ ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
+ ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full |
+ ADVERTISED_Autoneg | ADVERTISED_MII);
+
+ if (tp->tg3_flags & TG3_FLAG_10_100_ONLY)
+ tp->link_config.advertising &=
+ ~(ADVERTISED_1000baseT_Half |
+ ADVERTISED_1000baseT_Full);
+
+ new_adv = (ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
+ if (tp->link_config.advertising & ADVERTISED_10baseT_Half)
+ new_adv |= ADVERTISE_10HALF;
+ if (tp->link_config.advertising & ADVERTISED_10baseT_Full)
+ new_adv |= ADVERTISE_10FULL;
+ if (tp->link_config.advertising & ADVERTISED_100baseT_Half)
+ new_adv |= ADVERTISE_100HALF;
+ if (tp->link_config.advertising & ADVERTISED_100baseT_Full)
+ new_adv |= ADVERTISE_100FULL;
+ tg3_writephy(tp, MII_ADVERTISE, new_adv);
+
+ if (tp->link_config.advertising &
+ (ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full)) {
+ new_adv = 0;
+ if (tp->link_config.advertising & ADVERTISED_1000baseT_Half)
+ new_adv |= MII_TG3_CTRL_ADV_1000_HALF;
+ if (tp->link_config.advertising & ADVERTISED_1000baseT_Full)
+ new_adv |= MII_TG3_CTRL_ADV_1000_FULL;
+ if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY) &&
+ (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5701_B0))
+ new_adv |= (MII_TG3_CTRL_AS_MASTER |
+ MII_TG3_CTRL_ENABLE_AS_MASTER);
+ tg3_writephy(tp, MII_TG3_CTRL, new_adv);
+ } else {
+ tg3_writephy(tp, MII_TG3_CTRL, 0);
+ }
+ } else {
+ /* Asking for a specific link mode. */
+ if (tp->link_config.speed == SPEED_1000) {
+ new_adv = ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP;
+ tg3_writephy(tp, MII_ADVERTISE, new_adv);
+
+ if (tp->link_config.duplex == DUPLEX_FULL)
+ new_adv = MII_TG3_CTRL_ADV_1000_FULL;
+ else
+ new_adv = MII_TG3_CTRL_ADV_1000_HALF;
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5701_B0)
+ new_adv |= (MII_TG3_CTRL_AS_MASTER |
+ MII_TG3_CTRL_ENABLE_AS_MASTER);
+ tg3_writephy(tp, MII_TG3_CTRL, new_adv);
+ } else {
+ tg3_writephy(tp, MII_TG3_CTRL, 0);
+
+ new_adv = ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP;
+ if (tp->link_config.speed == SPEED_100) {
+ if (tp->link_config.duplex == DUPLEX_FULL)
+ new_adv |= ADVERTISE_100FULL;
+ else
+ new_adv |= ADVERTISE_100HALF;
+ } else {
+ if (tp->link_config.duplex == DUPLEX_FULL)
+ new_adv |= ADVERTISE_10FULL;
+ else
+ new_adv |= ADVERTISE_10HALF;
+ }
+ tg3_writephy(tp, MII_ADVERTISE, new_adv);
+ }
+ }
+
+ if (tp->link_config.autoneg == AUTONEG_DISABLE &&
+ tp->link_config.speed != SPEED_INVALID) {
+ u32 bmcr, orig_bmcr;
+
+ tp->link_config.active_speed = tp->link_config.speed;
+ tp->link_config.active_duplex = tp->link_config.duplex;
+
+ bmcr = 0;
+ switch (tp->link_config.speed) {
+ default:
+ case SPEED_10:
+ break;
+
+ case SPEED_100:
+ bmcr |= BMCR_SPEED100;
+ break;
+
+ case SPEED_1000:
+ bmcr |= TG3_BMCR_SPEED1000;
+ break;
+ };
+
+ if (tp->link_config.duplex == DUPLEX_FULL)
+ bmcr |= BMCR_FULLDPLX;
+
+ tg3_readphy(tp, MII_BMCR, &orig_bmcr);
+ if (bmcr != orig_bmcr) {
+ tg3_writephy(tp, MII_BMCR, BMCR_LOOPBACK);
+ for (i = 0; i < 15000; i++) {
+ u32 tmp;
+
+ udelay(10);
+ tg3_readphy(tp, MII_BMSR, &tmp);
+ tg3_readphy(tp, MII_BMSR, &tmp);
+ if (!(tmp & BMSR_LSTATUS)) {
+ udelay(40);
+ break;
+ }
+ }
+ tg3_writephy(tp, MII_BMCR, bmcr);
+ udelay(40);
+ }
+ } else {
+ tg3_writephy(tp, MII_BMCR,
+ BMCR_ANENABLE | BMCR_ANRESTART);
+ }
+
+ if (wait_for_link) {
+ tp->link_config.active_speed = SPEED_INVALID;
+ tp->link_config.active_duplex = DUPLEX_INVALID;
+ for (i = 0; i < 300000; i++) {
+ u32 tmp;
+
+ udelay(10);
+ tg3_readphy(tp, MII_BMSR, &tmp);
+ tg3_readphy(tp, MII_BMSR, &tmp);
+ if (!(tmp & BMSR_LSTATUS))
+ continue;
+
+ tg3_readphy(tp, MII_TG3_AUX_STAT, &tmp);
+ tg3_aux_stat_to_speed_duplex(tp, tmp,
+ &tp->link_config.active_speed,
+ &tp->link_config.active_duplex);
+ }
+ if (tp->link_config.active_speed == SPEED_INVALID)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int tg3_init_5401phy_dsp(struct tg3 *tp)
+{
+ int err;
+
+ /* Turn off tap power management. */
+ err = tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c20);
+
+ err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x0012);
+ err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x1804);
+
+ err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x0013);
+ err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x1204);
+
+ err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x8006);
+ err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x0132);
+
+ err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x8006);
+ err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x0232);
+
+ err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x201f);
+ err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x0a20);
+
+ udelay(40);
+
+ return err;
+}
+
+static int tg3_setup_copper_phy(struct tg3 *tp)
+{
+ int current_link_up;
+ u32 bmsr, dummy;
+ u16 current_speed;
+ u8 current_duplex;
+ int i, err;
+
+ tw32(MAC_STATUS,
+ (MAC_STATUS_SYNC_CHANGED |
+ MAC_STATUS_CFG_CHANGED));
+ tr32(MAC_STATUS);
+ udelay(40);
+
+ tp->mi_mode = MAC_MI_MODE_BASE;
+ tw32(MAC_MI_MODE, tp->mi_mode);
+ tr32(MAC_MI_MODE);
+ udelay(40);
+
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x02);
+
+ if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) {
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+
+ if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE))
+ bmsr = 0;
+
+ if (!(bmsr & BMSR_LSTATUS)) {
+ err = tg3_init_5401phy_dsp(tp);
+ if (err)
+ return err;
+
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+ for (i = 0; i < 1000; i++) {
+ udelay(10);
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+ if (bmsr & BMSR_LSTATUS) {
+ udelay(40);
+ break;
+ }
+ }
+
+ if ((tp->phy_id & PHY_ID_REV_MASK) == PHY_REV_BCM5401_B0 &&
+ !(bmsr & BMSR_LSTATUS) &&
+ tp->link_config.active_speed == SPEED_1000) {
+ err = tg3_phy_reset(tp, 1);
+ if (!err)
+ err = tg3_init_5401phy_dsp(tp);
+ if (err)
+ return err;
+ }
+ }
+ } else if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5701_B0) {
+ /* 5701 {A0,B0} CRC bug workaround */
+ tg3_writephy(tp, 0x15, 0x0a75);
+ tg3_writephy(tp, 0x1c, 0x8c68);
+ tg3_writephy(tp, 0x1c, 0x8d68);
+ tg3_writephy(tp, 0x1c, 0x8c68);
+ }
+
+ /* Clear pending interrupts... */
+ tg3_readphy(tp, MII_TG3_ISTAT, &dummy);
+ tg3_readphy(tp, MII_TG3_ISTAT, &dummy);
+
+ if (tp->tg3_flags & TG3_FLAG_USE_MI_INTERRUPT)
+ tg3_writephy(tp, MII_TG3_IMASK, ~MII_TG3_INT_LINKCHG);
+ else
+ tg3_writephy(tp, MII_TG3_IMASK, ~0);
+
+ if (tp->led_mode == led_mode_three_link)
+ tg3_writephy(tp, MII_TG3_EXT_CTRL,
+ MII_TG3_EXT_CTRL_LNK3_LED_MODE);
+ else
+ tg3_writephy(tp, MII_TG3_EXT_CTRL, 0);
+
+ current_link_up = 0;
+ current_speed = SPEED_INVALID;
+ current_duplex = DUPLEX_INVALID;
+
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+ tg3_readphy(tp, MII_BMSR, &bmsr);
+
+ if (bmsr & BMSR_LSTATUS) {
+ u32 aux_stat, bmcr;
+
+ tg3_readphy(tp, MII_TG3_AUX_STAT, &aux_stat);
+ for (i = 0; i < 2000; i++) {
+ udelay(10);
+ tg3_readphy(tp, MII_TG3_AUX_STAT, &aux_stat);
+ if (aux_stat)
+ break;
+ }
+
+ tg3_aux_stat_to_speed_duplex(tp, aux_stat,
+ &current_speed,
+ &current_duplex);
+ tg3_readphy(tp, MII_BMCR, &bmcr);
+ tg3_readphy(tp, MII_BMCR, &bmcr);
+ if (tp->link_config.autoneg == AUTONEG_ENABLE) {
+ if (bmcr & BMCR_ANENABLE) {
+ u32 gig_ctrl;
+
+ current_link_up = 1;
+
+ /* Force autoneg restart if we are exiting
+ * low power mode.
+ */
+ tg3_readphy(tp, MII_TG3_CTRL, &gig_ctrl);
+ if (!(gig_ctrl & (MII_TG3_CTRL_ADV_1000_HALF |
+ MII_TG3_CTRL_ADV_1000_FULL))) {
+ current_link_up = 0;
+ }
+ } else {
+ current_link_up = 0;
+ }
+ } else {
+ if (!(bmcr & BMCR_ANENABLE) &&
+ tp->link_config.speed == current_speed &&
+ tp->link_config.duplex == current_duplex) {
+ current_link_up = 1;
+ } else {
+ current_link_up = 0;
+ }
+ }
+
+ tp->link_config.active_speed = current_speed;
+ tp->link_config.active_duplex = current_duplex;
+ }
+
+ if (current_link_up == 1 &&
+ (tp->link_config.active_duplex == DUPLEX_FULL) &&
+ (tp->link_config.autoneg == AUTONEG_ENABLE)) {
+ u32 local_adv, remote_adv;
+
+ tg3_readphy(tp, MII_ADVERTISE, &local_adv);
+ local_adv &= (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM);
+
+ tg3_readphy(tp, MII_LPA, &remote_adv);
+ remote_adv &= (LPA_PAUSE_CAP | LPA_PAUSE_ASYM);
+
+ /* If we are not advertising full pause capability,
+ * something is wrong. Bring the link down and reconfigure.
+ */
+ if (local_adv != ADVERTISE_PAUSE_CAP) {
+ current_link_up = 0;
+ } else {
+ tg3_setup_flow_control(tp, local_adv, remote_adv);
+ }
+ }
+
+ if (current_link_up == 0) {
+ u32 tmp;
+
+ tg3_phy_copper_begin(tp, 0);
+
+ tg3_readphy(tp, MII_BMSR, &tmp);
+ tg3_readphy(tp, MII_BMSR, &tmp);
+ if (tmp & BMSR_LSTATUS)
+ current_link_up = 1;
+ }
+
+ tp->mac_mode &= ~MAC_MODE_PORT_MODE_MASK;
+ if (current_link_up == 1) {
+ if (tp->link_config.active_speed == SPEED_100 ||
+ tp->link_config.active_speed == SPEED_10)
+ tp->mac_mode |= MAC_MODE_PORT_MODE_MII;
+ else
+ tp->mac_mode |= MAC_MODE_PORT_MODE_GMII;
+ } else
+ tp->mac_mode |= MAC_MODE_PORT_MODE_GMII;
+
+ tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX;
+ if (tp->link_config.active_duplex == DUPLEX_HALF)
+ tp->mac_mode |= MAC_MODE_HALF_DUPLEX;
+
+ tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
+ if ((tp->led_mode == led_mode_link10) ||
+ (current_link_up == 1 &&
+ tp->link_config.active_speed == SPEED_10))
+ tp->mac_mode |= MAC_MODE_LINK_POLARITY;
+ } else {
+ if (current_link_up == 1)
+ tp->mac_mode |= MAC_MODE_LINK_POLARITY;
+ tw32(MAC_LED_CTRL, LED_CTRL_PHY_MODE_1);
+ }
+
+ /* ??? Without this setting Netgear GA302T PHY does not
+ * ??? send/receive packets...
+ */
+ if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5411 &&
+ tp->pci_chip_rev_id == CHIPREV_ID_5700_ALTIMA) {
+ tp->mi_mode |= MAC_MI_MODE_AUTO_POLL;
+ tw32(MAC_MI_MODE, tp->mi_mode);
+ tr32(MAC_MI_MODE);
+ udelay(40);
+ }
+
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ if (tp->tg3_flags &
+ (TG3_FLAG_USE_LINKCHG_REG |
+ TG3_FLAG_POLL_SERDES)) {
+ /* Polled via timer. */
+ tw32(MAC_EVENT, 0);
+ } else {
+ tw32(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED);
+ }
+ tr32(MAC_EVENT);
+ udelay(40);
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 &&
+ current_link_up == 1 &&
+ tp->link_config.active_speed == SPEED_1000 &&
+ ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ||
+ (tp->tg3_flags & TG3_FLAG_PCI_HIGH_SPEED))) {
+ udelay(120);
+ tw32(MAC_STATUS,
+ (MAC_STATUS_SYNC_CHANGED |
+ MAC_STATUS_CFG_CHANGED));
+ tr32(MAC_STATUS);
+ udelay(40);
+ tg3_write_mem(tp,
+ NIC_SRAM_FIRMWARE_MBOX,
+ NIC_SRAM_FIRMWARE_MBOX_MAGIC2);
+ }
+
+ if (current_link_up != netif_carrier_ok(tp->dev)) {
+ if (current_link_up)
+ netif_carrier_on(tp->dev);
+ else
+ netif_carrier_off(tp->dev);
+ tg3_link_report(tp);
+ }
+
+ return 0;
+}
+
+struct tg3_fiber_aneginfo {
+ int state;
+#define ANEG_STATE_UNKNOWN 0
+#define ANEG_STATE_AN_ENABLE 1
+#define ANEG_STATE_RESTART_INIT 2
+#define ANEG_STATE_RESTART 3
+#define ANEG_STATE_DISABLE_LINK_OK 4
+#define ANEG_STATE_ABILITY_DETECT_INIT 5
+#define ANEG_STATE_ABILITY_DETECT 6
+#define ANEG_STATE_ACK_DETECT_INIT 7
+#define ANEG_STATE_ACK_DETECT 8
+#define ANEG_STATE_COMPLETE_ACK_INIT 9
+#define ANEG_STATE_COMPLETE_ACK 10
+#define ANEG_STATE_IDLE_DETECT_INIT 11
+#define ANEG_STATE_IDLE_DETECT 12
+#define ANEG_STATE_LINK_OK 13
+#define ANEG_STATE_NEXT_PAGE_WAIT_INIT 14
+#define ANEG_STATE_NEXT_PAGE_WAIT 15
+
+ u32 flags;
+#define MR_AN_ENABLE 0x00000001
+#define MR_RESTART_AN 0x00000002
+#define MR_AN_COMPLETE 0x00000004
+#define MR_PAGE_RX 0x00000008
+#define MR_NP_LOADED 0x00000010
+#define MR_TOGGLE_TX 0x00000020
+#define MR_LP_ADV_FULL_DUPLEX 0x00000040
+#define MR_LP_ADV_HALF_DUPLEX 0x00000080
+#define MR_LP_ADV_SYM_PAUSE 0x00000100
+#define MR_LP_ADV_ASYM_PAUSE 0x00000200
+#define MR_LP_ADV_REMOTE_FAULT1 0x00000400
+#define MR_LP_ADV_REMOTE_FAULT2 0x00000800
+#define MR_LP_ADV_NEXT_PAGE 0x00001000
+#define MR_TOGGLE_RX 0x00002000
+#define MR_NP_RX 0x00004000
+
+#define MR_LINK_OK 0x80000000
+
+ unsigned long link_time, cur_time;
+
+ u32 ability_match_cfg;
+ int ability_match_count;
+
+ char ability_match, idle_match, ack_match;
+
+ u32 txconfig, rxconfig;
+#define ANEG_CFG_NP 0x00000080
+#define ANEG_CFG_ACK 0x00000040
+#define ANEG_CFG_RF2 0x00000020
+#define ANEG_CFG_RF1 0x00000010
+#define ANEG_CFG_PS2 0x00000001
+#define ANEG_CFG_PS1 0x00008000
+#define ANEG_CFG_HD 0x00004000
+#define ANEG_CFG_FD 0x00002000
+#define ANEG_CFG_INVAL 0x00001f06
+
+};
+#define ANEG_OK 0
+#define ANEG_DONE 1
+#define ANEG_TIMER_ENAB 2
+#define ANEG_FAILED -1
+
+#define ANEG_STATE_SETTLE_TIME 10000
+
+static int tg3_fiber_aneg_smachine(struct tg3 *tp,
+ struct tg3_fiber_aneginfo *ap)
+{
+ unsigned long delta;
+ u32 rx_cfg_reg;
+ int ret;
+
+ if (ap->state == ANEG_STATE_UNKNOWN) {
+ ap->rxconfig = 0;
+ ap->link_time = 0;
+ ap->cur_time = 0;
+ ap->ability_match_cfg = 0;
+ ap->ability_match_count = 0;
+ ap->ability_match = 0;
+ ap->idle_match = 0;
+ ap->ack_match = 0;
+ }
+ ap->cur_time++;
+
+ if (tr32(MAC_STATUS) & MAC_STATUS_RCVD_CFG) {
+ rx_cfg_reg = tr32(MAC_RX_AUTO_NEG);
+
+ if (rx_cfg_reg != ap->ability_match_cfg) {
+ ap->ability_match_cfg = rx_cfg_reg;
+ ap->ability_match = 0;
+ ap->ability_match_count = 0;
+ } else {
+ if (++ap->ability_match_count > 1) {
+ ap->ability_match = 1;
+ ap->ability_match_cfg = rx_cfg_reg;
+ }
+ }
+ if (rx_cfg_reg & ANEG_CFG_ACK)
+ ap->ack_match = 1;
+ else
+ ap->ack_match = 0;
+
+ ap->idle_match = 0;
+ } else {
+ ap->idle_match = 1;
+ ap->ability_match_cfg = 0;
+ ap->ability_match_count = 0;
+ ap->ability_match = 0;
+ ap->ack_match = 0;
+
+ rx_cfg_reg = 0;
+ }
+
+ ap->rxconfig = rx_cfg_reg;
+ ret = ANEG_OK;
+
+ switch(ap->state) {
+ case ANEG_STATE_UNKNOWN:
+ if (ap->flags & (MR_AN_ENABLE | MR_RESTART_AN))
+ ap->state = ANEG_STATE_AN_ENABLE;
+
+ /* fallthru */
+ case ANEG_STATE_AN_ENABLE:
+ ap->flags &= ~(MR_AN_COMPLETE | MR_PAGE_RX);
+ if (ap->flags & MR_AN_ENABLE) {
+ ap->link_time = 0;
+ ap->cur_time = 0;
+ ap->ability_match_cfg = 0;
+ ap->ability_match_count = 0;
+ ap->ability_match = 0;
+ ap->idle_match = 0;
+ ap->ack_match = 0;
+
+ ap->state = ANEG_STATE_RESTART_INIT;
+ } else {
+ ap->state = ANEG_STATE_DISABLE_LINK_OK;
+ }
+ break;
+
+ case ANEG_STATE_RESTART_INIT:
+ ap->link_time = ap->cur_time;
+ ap->flags &= ~(MR_NP_LOADED);
+ ap->txconfig = 0;
+ tw32(MAC_TX_AUTO_NEG, 0);
+ tp->mac_mode |= MAC_MODE_SEND_CONFIGS;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ ret = ANEG_TIMER_ENAB;
+ ap->state = ANEG_STATE_RESTART;
+
+ /* fallthru */
+ case ANEG_STATE_RESTART:
+ delta = ap->cur_time - ap->link_time;
+ if (delta > ANEG_STATE_SETTLE_TIME) {
+ ap->state = ANEG_STATE_ABILITY_DETECT_INIT;
+ } else {
+ ret = ANEG_TIMER_ENAB;
+ }
+ break;
+
+ case ANEG_STATE_DISABLE_LINK_OK:
+ ret = ANEG_DONE;
+ break;
+
+ case ANEG_STATE_ABILITY_DETECT_INIT:
+ ap->flags &= ~(MR_TOGGLE_TX);
+ ap->txconfig = (ANEG_CFG_FD | ANEG_CFG_PS1);
+ tw32(MAC_TX_AUTO_NEG, ap->txconfig);
+ tp->mac_mode |= MAC_MODE_SEND_CONFIGS;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ ap->state = ANEG_STATE_ABILITY_DETECT;
+ break;
+
+ case ANEG_STATE_ABILITY_DETECT:
+ if (ap->ability_match != 0 && ap->rxconfig != 0) {
+ ap->state = ANEG_STATE_ACK_DETECT_INIT;
+ }
+ break;
+
+ case ANEG_STATE_ACK_DETECT_INIT:
+ ap->txconfig |= ANEG_CFG_ACK;
+ tw32(MAC_TX_AUTO_NEG, ap->txconfig);
+ tp->mac_mode |= MAC_MODE_SEND_CONFIGS;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ ap->state = ANEG_STATE_ACK_DETECT;
+
+ /* fallthru */
+ case ANEG_STATE_ACK_DETECT:
+ if (ap->ack_match != 0) {
+ if ((ap->rxconfig & ~ANEG_CFG_ACK) ==
+ (ap->ability_match_cfg & ~ANEG_CFG_ACK)) {
+ ap->state = ANEG_STATE_COMPLETE_ACK_INIT;
+ } else {
+ ap->state = ANEG_STATE_AN_ENABLE;
+ }
+ } else if (ap->ability_match != 0 &&
+ ap->rxconfig == 0) {
+ ap->state = ANEG_STATE_AN_ENABLE;
+ }
+ break;
+
+ case ANEG_STATE_COMPLETE_ACK_INIT:
+ if (ap->rxconfig & ANEG_CFG_INVAL) {
+ ret = ANEG_FAILED;
+ break;
+ }
+ ap->flags &= ~(MR_LP_ADV_FULL_DUPLEX |
+ MR_LP_ADV_HALF_DUPLEX |
+ MR_LP_ADV_SYM_PAUSE |
+ MR_LP_ADV_ASYM_PAUSE |
+ MR_LP_ADV_REMOTE_FAULT1 |
+ MR_LP_ADV_REMOTE_FAULT2 |
+ MR_LP_ADV_NEXT_PAGE |
+ MR_TOGGLE_RX |
+ MR_NP_RX);
+ if (ap->rxconfig & ANEG_CFG_FD)
+ ap->flags |= MR_LP_ADV_FULL_DUPLEX;
+ if (ap->rxconfig & ANEG_CFG_HD)
+ ap->flags |= MR_LP_ADV_HALF_DUPLEX;
+ if (ap->rxconfig & ANEG_CFG_PS1)
+ ap->flags |= MR_LP_ADV_SYM_PAUSE;
+ if (ap->rxconfig & ANEG_CFG_PS2)
+ ap->flags |= MR_LP_ADV_ASYM_PAUSE;
+ if (ap->rxconfig & ANEG_CFG_RF1)
+ ap->flags |= MR_LP_ADV_REMOTE_FAULT1;
+ if (ap->rxconfig & ANEG_CFG_RF2)
+ ap->flags |= MR_LP_ADV_REMOTE_FAULT2;
+ if (ap->rxconfig & ANEG_CFG_NP)
+ ap->flags |= MR_LP_ADV_NEXT_PAGE;
+
+ ap->link_time = ap->cur_time;
+
+ ap->flags ^= (MR_TOGGLE_TX);
+ if (ap->rxconfig & 0x0008)
+ ap->flags |= MR_TOGGLE_RX;
+ if (ap->rxconfig & ANEG_CFG_NP)
+ ap->flags |= MR_NP_RX;
+ ap->flags |= MR_PAGE_RX;
+
+ ap->state = ANEG_STATE_COMPLETE_ACK;
+ ret = ANEG_TIMER_ENAB;
+ break;
+
+ case ANEG_STATE_COMPLETE_ACK:
+ if (ap->ability_match != 0 &&
+ ap->rxconfig == 0) {
+ ap->state = ANEG_STATE_AN_ENABLE;
+ break;
+ }
+ delta = ap->cur_time - ap->link_time;
+ if (delta > ANEG_STATE_SETTLE_TIME) {
+ if (!(ap->flags & (MR_LP_ADV_NEXT_PAGE))) {
+ ap->state = ANEG_STATE_IDLE_DETECT_INIT;
+ } else {
+ if ((ap->txconfig & ANEG_CFG_NP) == 0 &&
+ !(ap->flags & MR_NP_RX)) {
+ ap->state = ANEG_STATE_IDLE_DETECT_INIT;
+ } else {
+ ret = ANEG_FAILED;
+ }
+ }
+ }
+ break;
+
+ case ANEG_STATE_IDLE_DETECT_INIT:
+ ap->link_time = ap->cur_time;
+ tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ ap->state = ANEG_STATE_IDLE_DETECT;
+ ret = ANEG_TIMER_ENAB;
+ break;
+
+ case ANEG_STATE_IDLE_DETECT:
+ if (ap->ability_match != 0 &&
+ ap->rxconfig == 0) {
+ ap->state = ANEG_STATE_AN_ENABLE;
+ break;
+ }
+ delta = ap->cur_time - ap->link_time;
+ if (delta > ANEG_STATE_SETTLE_TIME) {
+ /* XXX another gem from the Broadcom driver :( */
+ ap->state = ANEG_STATE_LINK_OK;
+ }
+ break;
+
+ case ANEG_STATE_LINK_OK:
+ ap->flags |= (MR_AN_COMPLETE | MR_LINK_OK);
+ ret = ANEG_DONE;
+ break;
+
+ case ANEG_STATE_NEXT_PAGE_WAIT_INIT:
+ /* ??? unimplemented */
+ break;
+
+ case ANEG_STATE_NEXT_PAGE_WAIT:
+ /* ??? unimplemented */
+ break;
+
+ default:
+ ret = ANEG_FAILED;
+ break;
+ };
+
+ return ret;
+}
+
+static int tg3_setup_fiber_phy(struct tg3 *tp)
+{
+ u32 orig_pause_cfg;
+ u16 orig_active_speed;
+ u8 orig_active_duplex;
+ int current_link_up;
+ int i;
+
+ orig_pause_cfg =
+ (tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
+ TG3_FLAG_TX_PAUSE));
+ orig_active_speed = tp->link_config.active_speed;
+ orig_active_duplex = tp->link_config.active_duplex;
+
+ tp->mac_mode &= ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
+ tp->mac_mode |= MAC_MODE_PORT_MODE_TBI;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ /* Reset when initting first time or we have a link. */
+ if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+ (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED)) {
+ /* Set PLL lock range. */
+ tg3_writephy(tp, 0x16, 0x8007);
+
+ /* SW reset */
+ tg3_writephy(tp, MII_BMCR, BMCR_RESET);
+
+ /* Wait for reset to complete. */
+ /* XXX schedule_timeout() ... */
+ for (i = 0; i < 500; i++)
+ udelay(10);
+
+ /* Config mode; select PMA/Ch 1 regs. */
+ tg3_writephy(tp, 0x10, 0x8411);
+
+ /* Enable auto-lock and comdet, select txclk for tx. */
+ tg3_writephy(tp, 0x11, 0x0a10);
+
+ tg3_writephy(tp, 0x18, 0x00a0);
+ tg3_writephy(tp, 0x16, 0x41ff);
+
+ /* Assert and deassert POR. */
+ tg3_writephy(tp, 0x13, 0x0400);
+ udelay(40);
+ tg3_writephy(tp, 0x13, 0x0000);
+
+ tg3_writephy(tp, 0x11, 0x0a50);
+ udelay(40);
+ tg3_writephy(tp, 0x11, 0x0a10);
+
+ /* Wait for signal to stabilize */
+ /* XXX schedule_timeout() ... */
+ for (i = 0; i < 15000; i++)
+ udelay(10);
+
+ /* Deselect the channel register so we can read the PHYID
+ * later.
+ */
+ tg3_writephy(tp, 0x10, 0x8011);
+ }
+
+ /* Enable link change interrupt unless serdes polling. */
+ if (!(tp->tg3_flags & TG3_FLAG_POLL_SERDES))
+ tw32(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED);
+ else
+ tw32(MAC_EVENT, 0);
+ tr32(MAC_EVENT);
+ udelay(40);
+
+ current_link_up = 0;
+ if (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) {
+ if (tp->link_config.autoneg == AUTONEG_ENABLE &&
+ !(tp->tg3_flags & TG3_FLAG_GOT_SERDES_FLOWCTL)) {
+ struct tg3_fiber_aneginfo aninfo;
+ int status = ANEG_FAILED;
+ unsigned int tick;
+ u32 tmp;
+
+ memset(&aninfo, 0, sizeof(aninfo));
+ aninfo.flags |= (MR_AN_ENABLE);
+
+ tw32(MAC_TX_AUTO_NEG, 0);
+
+ tmp = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK;
+ tw32(MAC_MODE, tmp | MAC_MODE_PORT_MODE_GMII);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ tw32(MAC_MODE, tp->mac_mode | MAC_MODE_SEND_CONFIGS);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ aninfo.state = ANEG_STATE_UNKNOWN;
+ aninfo.cur_time = 0;
+ tick = 0;
+ while (++tick < 195000) {
+ status = tg3_fiber_aneg_smachine(tp, &aninfo);
+ if (status == ANEG_DONE ||
+ status == ANEG_FAILED)
+ break;
+
+ udelay(1);
+ }
+
+ tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ if (status == ANEG_DONE &&
+ (aninfo.flags &
+ (MR_AN_COMPLETE | MR_LINK_OK |
+ MR_LP_ADV_FULL_DUPLEX))) {
+ u32 local_adv, remote_adv;
+
+ local_adv = ADVERTISE_PAUSE_CAP;
+ remote_adv = 0;
+ if (aninfo.flags & MR_LP_ADV_SYM_PAUSE)
+ remote_adv |= LPA_PAUSE_CAP;
+ if (aninfo.flags & MR_LP_ADV_ASYM_PAUSE)
+ remote_adv |= LPA_PAUSE_ASYM;
+
+ tg3_setup_flow_control(tp, local_adv, remote_adv);
+
+ tp->tg3_flags |=
+ TG3_FLAG_GOT_SERDES_FLOWCTL;
+ current_link_up = 1;
+ }
+ for (i = 0; i < 60; i++) {
+ udelay(20);
+ tw32(MAC_STATUS,
+ (MAC_STATUS_SYNC_CHANGED |
+ MAC_STATUS_CFG_CHANGED));
+ tr32(MAC_STATUS);
+ udelay(40);
+ if ((tr32(MAC_STATUS) &
+ (MAC_STATUS_SYNC_CHANGED |
+ MAC_STATUS_CFG_CHANGED)) == 0)
+ break;
+ }
+ if (current_link_up == 0 &&
+ (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED)) {
+ current_link_up = 1;
+ }
+ } else {
+ /* Forcing 1000FD link up. */
+ current_link_up = 1;
+ }
+ }
+
+ tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ tp->hw_status->status =
+ (SD_STATUS_UPDATED |
+ (tp->hw_status->status & ~SD_STATUS_LINK_CHG));
+
+ for (i = 0; i < 100; i++) {
+ udelay(20);
+ tw32(MAC_STATUS,
+ (MAC_STATUS_SYNC_CHANGED |
+ MAC_STATUS_CFG_CHANGED));
+ tr32(MAC_STATUS);
+ udelay(40);
+ if ((tr32(MAC_STATUS) &
+ (MAC_STATUS_SYNC_CHANGED |
+ MAC_STATUS_CFG_CHANGED)) == 0)
+ break;
+ }
+
+ if ((tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) == 0)
+ current_link_up = 0;
+
+ if (current_link_up == 1) {
+ tp->link_config.active_speed = SPEED_1000;
+ tp->link_config.active_duplex = DUPLEX_FULL;
+ } else {
+ tp->link_config.active_speed = SPEED_INVALID;
+ tp->link_config.active_duplex = DUPLEX_INVALID;
+ }
+
+ if (current_link_up != netif_carrier_ok(tp->dev)) {
+ if (current_link_up)
+ netif_carrier_on(tp->dev);
+ else
+ netif_carrier_off(tp->dev);
+ tg3_link_report(tp);
+ } else {
+ u32 now_pause_cfg =
+ tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
+ TG3_FLAG_TX_PAUSE);
+ if (orig_pause_cfg != now_pause_cfg ||
+ orig_active_speed != tp->link_config.active_speed ||
+ orig_active_duplex != tp->link_config.active_duplex)
+ tg3_link_report(tp);
+ }
+
+ if ((tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) == 0) {
+ tw32(MAC_MODE, tp->mac_mode | MAC_MODE_LINK_POLARITY);
+ tr32(MAC_MODE);
+ udelay(40);
+ if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) {
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+ }
+ }
+
+ return 0;
+}
+
+static int tg3_setup_phy(struct tg3 *tp)
+{
+ int err;
+
+ if (tp->phy_id == PHY_ID_SERDES) {
+ err = tg3_setup_fiber_phy(tp);
+ } else {
+ err = tg3_setup_copper_phy(tp);
+ }
+
+ if (tp->link_config.active_speed == SPEED_1000 &&
+ tp->link_config.active_duplex == DUPLEX_HALF)
+ tw32(MAC_TX_LENGTHS,
+ ((2 << TX_LENGTHS_IPG_CRS_SHIFT) |
+ (6 << TX_LENGTHS_IPG_SHIFT) |
+ (0xff << TX_LENGTHS_SLOT_TIME_SHIFT)));
+ else
+ tw32(MAC_TX_LENGTHS,
+ ((2 << TX_LENGTHS_IPG_CRS_SHIFT) |
+ (6 << TX_LENGTHS_IPG_SHIFT) |
+ (32 << TX_LENGTHS_SLOT_TIME_SHIFT)));
+
+ return err;
+}
+
+/* Tigon3 never reports partial packet sends. So we do not
+ * need special logic to handle SKBs that have not had all
+ * of their frags sent yet, like SunGEM does.
+ */
+static void tg3_tx(struct tg3 *tp)
+{
+ u32 hw_idx = tp->hw_status->idx[0].tx_consumer;
+ u32 sw_idx = tp->tx_cons;
+
+ while (sw_idx != hw_idx) {
+ struct tx_ring_info *ri = &tp->tx_buffers[sw_idx];
+ struct sk_buff *skb = ri->skb;
+ int i;
+
+ if (unlikely(skb == NULL))
+ BUG();
+
+ pci_unmap_single(tp->pdev,
+ pci_unmap_addr(ri, mapping),
+ (skb->len - skb->data_len),
+ PCI_DMA_TODEVICE);
+
+ ri->skb = NULL;
+
+ sw_idx = NEXT_TX(sw_idx);
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ if (unlikely(sw_idx == hw_idx))
+ BUG();
+
+ ri = &tp->tx_buffers[sw_idx];
+ if (unlikely(ri->skb != NULL))
+ BUG();
+
+ pci_unmap_page(tp->pdev,
+ pci_unmap_addr(ri, mapping),
+ skb_shinfo(skb)->frags[i].size,
+ PCI_DMA_TODEVICE);
+
+ sw_idx = NEXT_TX(sw_idx);
+ }
+
+ dev_kfree_skb_irq(skb);
+ }
+
+ tp->tx_cons = sw_idx;
+
+ if (netif_queue_stopped(tp->dev) &&
+ (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH))
+ netif_wake_queue(tp->dev);
+}
+
+/* Returns size of skb allocated or < 0 on error.
+ *
+ * We only need to fill in the address because the other members
+ * of the RX descriptor are invariant, see tg3_init_rings.
+ *
+ * Note the purposeful assymetry of cpu vs. chip accesses. For
+ * posting buffers we only dirty the first cache line of the RX
+ * descriptor (containing the address). Whereas for the RX status
+ * buffers the cpu only reads the last cacheline of the RX descriptor
+ * (to fetch the error flags, vlan tag, checksum, and opaque cookie).
+ */
+static int tg3_alloc_rx_skb(struct tg3 *tp, u32 opaque_key,
+ int src_idx, u32 dest_idx_unmasked)
+{
+ struct tg3_rx_buffer_desc *desc;
+ struct ring_info *map, *src_map;
+ struct sk_buff *skb;
+ dma_addr_t mapping;
+ int skb_size, dest_idx;
+
+ src_map = NULL;
+ switch (opaque_key) {
+ case RXD_OPAQUE_RING_STD:
+ dest_idx = dest_idx_unmasked % TG3_RX_RING_SIZE;
+ desc = &tp->rx_std[dest_idx];
+ map = &tp->rx_std_buffers[dest_idx];
+ if (src_idx >= 0)
+ src_map = &tp->rx_std_buffers[src_idx];
+ skb_size = RX_PKT_BUF_SZ;
+ break;
+
+ case RXD_OPAQUE_RING_JUMBO:
+ dest_idx = dest_idx_unmasked % TG3_RX_JUMBO_RING_SIZE;
+ desc = &tp->rx_jumbo[dest_idx];
+ map = &tp->rx_jumbo_buffers[dest_idx];
+ if (src_idx >= 0)
+ src_map = &tp->rx_jumbo_buffers[src_idx];
+ skb_size = RX_JUMBO_PKT_BUF_SZ;
+ break;
+
+ default:
+ return -EINVAL;
+ };
+
+ /* Do not overwrite any of the map or rp information
+ * until we are sure we can commit to a new buffer.
+ *
+ * Callers depend upon this behavior and assume that
+ * we leave everything unchanged if we fail.
+ */
+ skb = dev_alloc_skb(skb_size);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ skb->dev = tp->dev;
+ skb_reserve(skb, tp->rx_offset);
+
+ mapping = pci_map_single(tp->pdev, skb->data,
+ skb_size - tp->rx_offset,
+ PCI_DMA_FROMDEVICE);
+
+ map->skb = skb;
+ pci_unmap_addr_set(map, mapping, mapping);
+
+ if (src_map != NULL)
+ src_map->skb = NULL;
+
+ desc->addr_hi = ((u64)mapping >> 32);
+ desc->addr_lo = ((u64)mapping & 0xffffffff);
+
+ return skb_size;
+}
+
+/* We only need to move over in the address because the other
+ * members of the RX descriptor are invariant. See notes above
+ * tg3_alloc_rx_skb for full details.
+ */
+static void tg3_recycle_rx(struct tg3 *tp, u32 opaque_key,
+ int src_idx, u32 dest_idx_unmasked)
+{
+ struct tg3_rx_buffer_desc *src_desc, *dest_desc;
+ struct ring_info *src_map, *dest_map;
+ int dest_idx;
+
+ switch (opaque_key) {
+ case RXD_OPAQUE_RING_STD:
+ dest_idx = dest_idx_unmasked % TG3_RX_RING_SIZE;
+ dest_desc = &tp->rx_std[dest_idx];
+ dest_map = &tp->rx_std_buffers[dest_idx];
+ src_desc = &tp->rx_std[src_idx];
+ src_map = &tp->rx_std_buffers[src_idx];
+ break;
+
+ case RXD_OPAQUE_RING_JUMBO:
+ dest_idx = dest_idx_unmasked % TG3_RX_JUMBO_RING_SIZE;
+ dest_desc = &tp->rx_jumbo[dest_idx];
+ dest_map = &tp->rx_jumbo_buffers[dest_idx];
+ src_desc = &tp->rx_jumbo[src_idx];
+ src_map = &tp->rx_jumbo_buffers[src_idx];
+ break;
+
+ default:
+ return;
+ };
+
+ dest_map->skb = src_map->skb;
+ pci_unmap_addr_set(dest_map, mapping,
+ pci_unmap_addr(src_map, mapping));
+ dest_desc->addr_hi = src_desc->addr_hi;
+ dest_desc->addr_lo = src_desc->addr_lo;
+
+ src_map->skb = NULL;
+}
+
+#if TG3_VLAN_TAG_USED
+static int tg3_vlan_rx(struct tg3 *tp, struct sk_buff *skb, u16 vlan_tag)
+{
+ return vlan_hwaccel_receive_skb(skb, tp->vlgrp, vlan_tag);
+}
+#endif
+
+/* The RX ring scheme is composed of multiple rings which post fresh
+ * buffers to the chip, and one special ring the chip uses to report
+ * status back to the host.
+ *
+ * The special ring reports the status of received packets to the
+ * host. The chip does not write into the original descriptor the
+ * RX buffer was obtained from. The chip simply takes the original
+ * descriptor as provided by the host, updates the status and length
+ * field, then writes this into the next status ring entry.
+ *
+ * Each ring the host uses to post buffers to the chip is described
+ * by a TG3_BDINFO entry in the chips SRAM area. When a packet arrives,
+ * it is first placed into the on-chip ram. When the packet's length
+ * is known, it walks down the TG3_BDINFO entries to select the ring.
+ * Each TG3_BDINFO specifies a MAXLEN field and the first TG3_BDINFO
+ * which is within the range of the new packet's length is chosen.
+ *
+ * The "seperate ring for rx status" scheme may sound queer, but it makes
+ * sense from a cache coherency perspective. If only the host writes
+ * to the buffer post rings, and only the chip writes to the rx status
+ * rings, then cache lines never move beyond shared-modified state.
+ * If both the host and chip were to write into the same ring, cache line
+ * eviction could occur since both entities want it in an exclusive state.
+ */
+static int tg3_rx(struct tg3 *tp, int budget)
+{
+ u32 work_mask;
+ u32 rx_rcb_ptr = tp->rx_rcb_ptr;
+ u16 hw_idx, sw_idx;
+ int received;
+
+ hw_idx = tp->hw_status->idx[0].rx_producer;
+ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE;
+ work_mask = 0;
+ received = 0;
+ while (sw_idx != hw_idx && budget > 0) {
+ struct tg3_rx_buffer_desc *desc = &tp->rx_rcb[sw_idx];
+ unsigned int len;
+ struct sk_buff *skb;
+ dma_addr_t dma_addr;
+ u32 opaque_key, desc_idx, *post_ptr;
+
+ desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
+ opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
+ if (opaque_key == RXD_OPAQUE_RING_STD) {
+ dma_addr = pci_unmap_addr(&tp->rx_std_buffers[desc_idx],
+ mapping);
+ skb = tp->rx_std_buffers[desc_idx].skb;
+ post_ptr = &tp->rx_std_ptr;
+ } else if (opaque_key == RXD_OPAQUE_RING_JUMBO) {
+ dma_addr = pci_unmap_addr(&tp->rx_jumbo_buffers[desc_idx],
+ mapping);
+ skb = tp->rx_jumbo_buffers[desc_idx].skb;
+ post_ptr = &tp->rx_jumbo_ptr;
+ }
+ else {
+ goto next_pkt_nopost;
+ }
+
+ work_mask |= opaque_key;
+
+ if ((desc->err_vlan & RXD_ERR_MASK) != 0 &&
+ (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII)) {
+ drop_it:
+ tg3_recycle_rx(tp, opaque_key,
+ desc_idx, *post_ptr);
+ drop_it_no_recycle:
+ /* Other statistics kept track of by card. */
+ tp->net_stats.rx_dropped++;
+ goto next_pkt;
+ }
+
+ len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */
+
+ if (len > RX_COPY_THRESHOLD) {
+ int skb_size;
+
+ skb_size = tg3_alloc_rx_skb(tp, opaque_key,
+ desc_idx, *post_ptr);
+ if (skb_size < 0)
+ goto drop_it;
+
+ pci_unmap_single(tp->pdev, dma_addr,
+ skb_size - tp->rx_offset,
+ PCI_DMA_FROMDEVICE);
+
+ skb_put(skb, len);
+ } else {
+ struct sk_buff *copy_skb;
+
+ tg3_recycle_rx(tp, opaque_key,
+ desc_idx, *post_ptr);
+
+ copy_skb = dev_alloc_skb(len + 2);
+ if (copy_skb == NULL)
+ goto drop_it_no_recycle;
+
+ copy_skb->dev = tp->dev;
+ skb_reserve(copy_skb, 2);
+ skb_put(copy_skb, len);
+ pci_dma_sync_single(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+ memcpy(copy_skb->data, skb->data, len);
+
+ /* We'll reuse the original ring buffer. */
+ skb = copy_skb;
+ }
+
+ if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) &&
+ (desc->type_flags & RXD_FLAG_TCPUDP_CSUM)) {
+ skb->csum = htons((desc->ip_tcp_csum & RXD_TCPCSUM_MASK)
+ >> RXD_TCPCSUM_SHIFT);
+ skb->ip_summed = CHECKSUM_HW;
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
+ skb->protocol = eth_type_trans(skb, tp->dev);
+#if TG3_VLAN_TAG_USED
+ if (tp->vlgrp != NULL &&
+ desc->type_flags & RXD_FLAG_VLAN) {
+ tg3_vlan_rx(tp, skb,
+ desc->err_vlan & RXD_VLAN_MASK);
+ } else
+#endif
+
+#ifdef NAPI
+ netif_receive_skb(skb);
+#else
+ netif_rx(skb);
+#endif
+ tp->dev->last_rx = jiffies;
+ received++;
+ budget--;
+
+next_pkt:
+ (*post_ptr)++;
+next_pkt_nopost:
+ rx_rcb_ptr++;
+ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE;
+ }
+
+ /* ACK the status ring. */
+ tp->rx_rcb_ptr = rx_rcb_ptr;
+ tw32_mailbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW,
+ (rx_rcb_ptr % TG3_RX_RCB_RING_SIZE));
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW);
+
+ /* Refill RX ring(s). */
+ if (work_mask & RXD_OPAQUE_RING_STD) {
+ sw_idx = tp->rx_std_ptr % TG3_RX_RING_SIZE;
+ tw32_mailbox(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW,
+ sw_idx);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW);
+ }
+ if (work_mask & RXD_OPAQUE_RING_JUMBO) {
+ sw_idx = tp->rx_jumbo_ptr % TG3_RX_JUMBO_RING_SIZE;
+ tw32_mailbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW,
+ sw_idx);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW);
+ }
+
+ return received;
+}
+
+static int tg3_poll(struct net_device *netdev, int *budget)
+{
+ struct tg3 *tp = netdev->priv;
+ struct tg3_hw_status *sblk = tp->hw_status;
+ int done;
+#ifdef NAPI
+ unsigned long flags;
+ spin_lock_irqsave(&tp->lock, flags);
+#endif
+ if (!(tp->tg3_flags &
+ (TG3_FLAG_USE_LINKCHG_REG |
+ TG3_FLAG_POLL_SERDES))) {
+ if (sblk->status & SD_STATUS_LINK_CHG) {
+ sblk->status = SD_STATUS_UPDATED |
+ (sblk->status & ~SD_STATUS_LINK_CHG);
+ tg3_setup_phy(tp);
+ }
+ }
+
+ if (sblk->idx[0].tx_consumer != tp->tx_cons) {
+ spin_lock(&tp->tx_lock);
+ tg3_tx(tp);
+ spin_unlock(&tp->tx_lock);
+ }
+
+ done = 1;
+ if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) {
+ int work_done;
+#ifdef NAPI
+ int orig_budget = *budget;
+ if (orig_budget > netdev->quota)
+ orig_budget = netdev->quota;
+
+ work_done = tg3_rx(tp, orig_budget);
+
+ *budget -= work_done;
+ netdev->quota -= work_done;
+
+
+ if (work_done >= orig_budget)
+ done = 0;
+#else
+ work_done = tg3_rx(tp, 1000);
+#endif
+ }
+#ifdef NAPI
+ if (done) {
+ netif_rx_complete(netdev);
+ tg3_enable_ints(tp);
+ }
+
+ spin_unlock_irqrestore(&tp->lock, flags);
+#endif
+ return (done ? 0 : 1);
+}
+
+static inline unsigned int tg3_has_work(struct net_device *dev, struct tg3 *tp)
+{
+ struct tg3_hw_status *sblk = tp->hw_status;
+ unsigned int work_exists = 0;
+
+ if (!(tp->tg3_flags &
+ (TG3_FLAG_USE_LINKCHG_REG |
+ TG3_FLAG_POLL_SERDES))) {
+ if (sblk->status & SD_STATUS_LINK_CHG)
+ work_exists = 1;
+ }
+ if (sblk->idx[0].tx_consumer != tp->tx_cons ||
+ sblk->idx[0].rx_producer != tp->rx_rcb_ptr)
+ work_exists = 1;
+
+ return work_exists;
+}
+
+static void tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct net_device *dev = dev_id;
+ struct tg3 *tp = dev->priv;
+#ifdef NAPI
+ struct tg3_hw_status *sblk = tp->hw_status;
+#endif
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->lock, flags);
+#if NAPI
+ if (sblk->status & SD_STATUS_UPDATED) {
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+ 0x00000001);
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ sblk->status &= ~SD_STATUS_UPDATED;
+
+ if (likely(tg3_has_work(dev, tp)))
+ netif_rx_schedule(dev);
+ else {
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+ 0x00000000);
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ }
+ }
+#else
+ {
+ int budget = 1000;
+ tg3_poll( dev, &budget );
+ }
+#endif
+
+ spin_unlock_irqrestore(&tp->lock, flags);
+}
+
+static void tg3_init_rings(struct tg3 *);
+static int tg3_init_hw(struct tg3 *);
+
+static void tg3_tx_timeout(struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+
+ printk(KERN_ERR PFX "%s: transmit timed out, resetting\n",
+ dev->name);
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tg3_halt(tp);
+ tg3_init_rings(tp);
+ tg3_init_hw(tp);
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ netif_wake_queue(dev);
+}
+
+#if !PCI_DMA_BUS_IS_PHYS
+static void tg3_set_txd_addr(struct tg3 *tp, int entry, dma_addr_t mapping)
+{
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+ struct tg3_tx_buffer_desc *txd = &tp->tx_ring[entry];
+
+ txd->addr_hi = ((u64) mapping >> 32);
+ txd->addr_lo = ((u64) mapping & 0xffffffff);
+ } else {
+ unsigned long txd;
+
+ txd = (tp->regs +
+ NIC_SRAM_WIN_BASE +
+ NIC_SRAM_TX_BUFFER_DESC);
+ txd += (entry * TXD_SIZE);
+
+ if (sizeof(dma_addr_t) != sizeof(u32))
+ writel(((u64) mapping >> 32),
+ txd + TXD_ADDR + TG3_64BIT_REG_HIGH);
+
+ writel(((u64) mapping & 0xffffffff),
+ txd + TXD_ADDR + TG3_64BIT_REG_LOW);
+ }
+}
+#endif
+
+static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32);
+
+static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
+ u32 guilty_entry, int guilty_len,
+ u32 last_plus_one, u32 *start, u32 mss)
+{
+ dma_addr_t new_addr;
+ u32 entry = *start;
+ int i;
+
+#if !PCI_DMA_BUS_IS_PHYS
+ /* IOMMU, just map the guilty area again which is guarenteed to
+ * use different addresses.
+ */
+
+ i = 0;
+ while (entry != guilty_entry) {
+ entry = NEXT_TX(entry);
+ i++;
+ }
+ if (i == 0) {
+ new_addr = pci_map_single(tp->pdev, skb->data, guilty_len,
+ PCI_DMA_TODEVICE);
+ } else {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+
+ new_addr = pci_map_page(tp->pdev,
+ frag->page, frag->page_offset,
+ guilty_len, PCI_DMA_TODEVICE);
+ }
+ pci_unmap_single(tp->pdev, pci_unmap_addr(&tp->tx_buffers[guilty_entry],
+ mapping),
+ guilty_len, PCI_DMA_TODEVICE);
+ tg3_set_txd_addr(tp, guilty_entry, new_addr);
+ pci_unmap_addr_set(&tp->tx_buffers[guilty_entry], mapping,
+ new_addr);
+ *start = last_plus_one;
+#else
+ /* Oh well, no IOMMU, have to allocate a whole new SKB. */
+ struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC);
+
+ if (!new_skb) {
+ dev_kfree_skb(skb);
+ return -1;
+ }
+
+ /* NOTE: Broadcom's driver botches this case up really bad.
+ * This is especially true if any of the frag pages
+ * are in highmem. It will instantly oops in that case.
+ */
+
+ /* New SKB is guarenteed to be linear. */
+ entry = *start;
+ new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
+ PCI_DMA_TODEVICE);
+ tg3_set_txd(tp, entry, new_addr, new_skb->len,
+ (skb->ip_summed == CHECKSUM_HW) ?
+ TXD_FLAG_TCPUDP_CSUM : 0, 1 | (mss << 1));
+ *start = NEXT_TX(entry);
+
+ /* Now clean up the sw ring entries. */
+ i = 0;
+ while (entry != last_plus_one) {
+ int len;
+
+ if (i == 0)
+ len = skb->len - skb->data_len;
+ else
+ len = skb_shinfo(skb)->frags[i-1].size;
+ pci_unmap_single(tp->pdev,
+ pci_unmap_addr(&tp->tx_buffers[entry], mapping),
+ len, PCI_DMA_TODEVICE);
+ if (i == 0) {
+ tp->tx_buffers[entry].skb = new_skb;
+ pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, new_addr);
+ } else {
+ tp->tx_buffers[entry].skb = NULL;
+ }
+ entry = NEXT_TX(entry);
+ }
+
+ dev_kfree_skb(skb);
+#endif
+
+ return 0;
+}
+
+static void tg3_set_txd(struct tg3 *tp, int entry,
+ dma_addr_t mapping, int len, u32 flags,
+ u32 mss_and_is_end)
+{
+ int is_end = (mss_and_is_end & 0x1);
+ u32 mss = (mss_and_is_end >> 1);
+ u32 vlan_tag = 0;
+
+ if (is_end)
+ flags |= TXD_FLAG_END;
+ if (flags & TXD_FLAG_VLAN) {
+ vlan_tag = flags >> 16;
+ flags &= 0xffff;
+ }
+ vlan_tag |= (mss << TXD_MSS_SHIFT);
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+ struct tg3_tx_buffer_desc *txd = &tp->tx_ring[entry];
+
+ txd->addr_hi = ((u64) mapping >> 32);
+ txd->addr_lo = ((u64) mapping & 0xffffffff);
+ txd->len_flags = (len << TXD_LEN_SHIFT) | flags;
+ txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
+ } else {
+ struct tx_ring_info *txr = &tp->tx_buffers[entry];
+ unsigned long txd;
+
+ txd = (tp->regs +
+ NIC_SRAM_WIN_BASE +
+ NIC_SRAM_TX_BUFFER_DESC);
+ txd += (entry * TXD_SIZE);
+
+ /* Save some PIOs */
+ if (sizeof(dma_addr_t) != sizeof(u32))
+ writel(((u64) mapping >> 32),
+ txd + TXD_ADDR + TG3_64BIT_REG_HIGH);
+
+ writel(((u64) mapping & 0xffffffff),
+ txd + TXD_ADDR + TG3_64BIT_REG_LOW);
+ writel(len << TXD_LEN_SHIFT | flags, txd + TXD_LEN_FLAGS);
+ if (txr->prev_vlan_tag != vlan_tag) {
+ writel(vlan_tag << TXD_VLAN_TAG_SHIFT, txd + TXD_VLAN_TAG);
+ txr->prev_vlan_tag = vlan_tag;
+ }
+ }
+}
+
+static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
+{
+ u32 base = (u32) mapping & 0xffffffff;
+
+ return ((base > 0xffffdcc0) &&
+ ((u64) mapping >> 32) == 0 &&
+ (base + len + 8 < base));
+}
+
+static int tg3_start_xmit_4gbug(struct sk_buff *skb, struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+ dma_addr_t mapping;
+ unsigned int i;
+ u32 len, entry, base_flags, mss;
+ int would_hit_hwbug;
+ unsigned long flags;
+
+ len = (skb->len - skb->data_len);
+
+ /* No BH disabling for tx_lock here. We are running in BH disabled
+ * context and TX reclaim runs via tp->poll inside of a software
+ * interrupt. Rejoice!
+ *
+ * Actually, things are not so simple. If we are to take a hw
+ * IRQ here, we can deadlock, consider:
+ *
+ * CPU1 CPU2
+ * tg3_start_xmit
+ * take tp->tx_lock
+ * tg3_timer
+ * take tp->lock
+ * tg3_interrupt
+ * spin on tp->lock
+ * spin on tp->tx_lock
+ *
+ * So we really do need to disable interrupts when taking
+ * tx_lock here.
+ */
+ spin_lock_irqsave(&tp->tx_lock, flags);
+
+ /* This is a hard error, log it. */
+ if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
+ netif_stop_queue(dev);
+ spin_unlock_irqrestore(&tp->tx_lock, flags);
+ printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
+ dev->name);
+ return 1;
+ }
+
+ entry = tp->tx_prod;
+ base_flags = 0;
+ if (skb->ip_summed == CHECKSUM_HW)
+ base_flags |= TXD_FLAG_TCPUDP_CSUM;
+#if TG3_DO_TSO != 0
+ if ((mss = skb_shinfo(skb)->tso_size) != 0)
+ base_flags |= (TXD_FLAG_CPU_PRE_DMA |
+ TXD_FLAG_CPU_POST_DMA);
+#else
+ mss = 0;
+#endif
+#if TG3_VLAN_TAG_USED
+ if (tp->vlgrp != NULL && vlan_tx_tag_present(skb))
+ base_flags |= (TXD_FLAG_VLAN |
+ (vlan_tx_tag_get(skb) << 16));
+#endif
+
+ /* Queue skb data, a.k.a. the main skb fragment. */
+ mapping = pci_map_single(tp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+
+ tp->tx_buffers[entry].skb = skb;
+ pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+ would_hit_hwbug = 0;
+
+ if (tg3_4g_overflow_test(mapping, len))
+ would_hit_hwbug = entry + 1;
+
+ tg3_set_txd(tp, entry, mapping, len, base_flags,
+ (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
+
+ entry = NEXT_TX(entry);
+
+ /* Now loop through additional data fragments, and queue them. */
+ if (skb_shinfo(skb)->nr_frags > 0) {
+ unsigned int i, last;
+
+ last = skb_shinfo(skb)->nr_frags - 1;
+ for (i = 0; i <= last; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ len = frag->size;
+ mapping = pci_map_page(tp->pdev,
+ frag->page,
+ frag->page_offset,
+ len, PCI_DMA_TODEVICE);
+
+ tp->tx_buffers[entry].skb = NULL;
+ pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+ if (tg3_4g_overflow_test(mapping, len)) {
+ /* Only one should match. */
+ if (would_hit_hwbug)
+ BUG();
+ would_hit_hwbug = entry + 1;
+ }
+
+ tg3_set_txd(tp, entry, mapping, len,
+ base_flags, (i == last) | (mss << 1));
+
+ entry = NEXT_TX(entry);
+ }
+ }
+
+ if (would_hit_hwbug) {
+ u32 last_plus_one = entry;
+ u32 start;
+ unsigned int len = 0;
+
+ would_hit_hwbug -= 1;
+ entry = entry - 1 - skb_shinfo(skb)->nr_frags;
+ entry &= (TG3_TX_RING_SIZE - 1);
+ start = entry;
+ i = 0;
+ while (entry != last_plus_one) {
+ if (i == 0)
+ len = skb->len - skb->data_len;
+ else
+ len = skb_shinfo(skb)->frags[i-1].size;
+
+ if (entry == would_hit_hwbug)
+ break;
+
+ i++;
+ entry = NEXT_TX(entry);
+
+ }
+
+ /* If the workaround fails due to memory/mapping
+ * failure, silently drop this packet.
+ */
+ if (tigon3_4gb_hwbug_workaround(tp, skb,
+ entry, len,
+ last_plus_one,
+ &start, mss))
+ goto out_unlock;
+
+ entry = start;
+ }
+
+ /* Packets are ready, update Tx producer idx local and on card. */
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+ tw32_mailbox((MAILBOX_SNDHOST_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW), entry);
+ if (tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG)
+ tw32_mailbox((MAILBOX_SNDHOST_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW), entry);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_SNDHOST_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW);
+ } else {
+ /* First, make sure tg3 sees last descriptor fully
+ * in SRAM.
+ */
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW);
+
+ tw32_mailbox((MAILBOX_SNDNIC_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW), entry);
+ if (tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG)
+ tw32_mailbox((MAILBOX_SNDNIC_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW), entry);
+
+ /* Now post the mailbox write itself. */
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW);
+ }
+
+ tp->tx_prod = entry;
+ if (TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))
+ netif_stop_queue(dev);
+
+out_unlock:
+ spin_unlock_irqrestore(&tp->tx_lock, flags);
+
+ dev->trans_start = jiffies;
+
+ return 0;
+}
+
+static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+ dma_addr_t mapping;
+ u32 len, entry, base_flags, mss;
+ unsigned long flags;
+
+ len = (skb->len - skb->data_len);
+
+ /* No BH disabling for tx_lock here. We are running in BH disabled
+ * context and TX reclaim runs via tp->poll inside of a software
+ * interrupt. Rejoice!
+ *
+ * Actually, things are not so simple. If we are to take a hw
+ * IRQ here, we can deadlock, consider:
+ *
+ * CPU1 CPU2
+ * tg3_start_xmit
+ * take tp->tx_lock
+ * tg3_timer
+ * take tp->lock
+ * tg3_interrupt
+ * spin on tp->lock
+ * spin on tp->tx_lock
+ *
+ * So we really do need to disable interrupts when taking
+ * tx_lock here.
+ */
+ spin_lock_irqsave(&tp->tx_lock, flags);
+
+ /* This is a hard error, log it. */
+ if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
+ netif_stop_queue(dev);
+ spin_unlock_irqrestore(&tp->tx_lock, flags);
+ printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
+ dev->name);
+ return 1;
+ }
+
+ entry = tp->tx_prod;
+ base_flags = 0;
+ if (skb->ip_summed == CHECKSUM_HW)
+ base_flags |= TXD_FLAG_TCPUDP_CSUM;
+#if TG3_DO_TSO != 0
+ if ((mss = skb_shinfo(skb)->tso_size) != 0)
+ base_flags |= (TXD_FLAG_CPU_PRE_DMA |
+ TXD_FLAG_CPU_POST_DMA);
+#else
+ mss = 0;
+#endif
+#if TG3_VLAN_TAG_USED
+ if (tp->vlgrp != NULL && vlan_tx_tag_present(skb))
+ base_flags |= (TXD_FLAG_VLAN |
+ (vlan_tx_tag_get(skb) << 16));
+#endif
+
+ /* Queue skb data, a.k.a. the main skb fragment. */
+ mapping = pci_map_single(tp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+
+ tp->tx_buffers[entry].skb = skb;
+ pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+ tg3_set_txd(tp, entry, mapping, len, base_flags,
+ (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
+
+ entry = NEXT_TX(entry);
+
+ /* Now loop through additional data fragments, and queue them. */
+ if (skb_shinfo(skb)->nr_frags > 0) {
+ unsigned int i, last;
+
+ last = skb_shinfo(skb)->nr_frags - 1;
+ for (i = 0; i <= last; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+
+ len = frag->size;
+ mapping = pci_map_page(tp->pdev,
+ frag->page,
+ frag->page_offset,
+ len, PCI_DMA_TODEVICE);
+
+ tp->tx_buffers[entry].skb = NULL;
+ pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+ tg3_set_txd(tp, entry, mapping, len,
+ base_flags, (i == last) | (mss << 1));
+
+ entry = NEXT_TX(entry);
+ }
+ }
+
+ /* Packets are ready, update Tx producer idx local and on card.
+ * We know this is not a 5700 (by virtue of not being a chip
+ * requiring the 4GB overflow workaround) so we can safely omit
+ * the double-write bug tests.
+ */
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+ tw32_mailbox((MAILBOX_SNDHOST_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW), entry);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_SNDHOST_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW);
+ } else {
+ /* First, make sure tg3 sees last descriptor fully
+ * in SRAM.
+ */
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW);
+
+ tw32_mailbox((MAILBOX_SNDNIC_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW), entry);
+
+ /* Now post the mailbox write itself. */
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+ TG3_64BIT_REG_LOW);
+ }
+
+ tp->tx_prod = entry;
+ if (TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))
+ netif_stop_queue(dev);
+
+ spin_unlock_irqrestore(&tp->tx_lock, flags);
+
+ dev->trans_start = jiffies;
+
+ return 0;
+}
+
+static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp,
+ int new_mtu)
+{
+ dev->mtu = new_mtu;
+
+ if (new_mtu > ETH_DATA_LEN)
+ tp->tg3_flags |= TG3_FLAG_JUMBO_ENABLE;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_JUMBO_ENABLE;
+}
+
+static int tg3_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct tg3 *tp = dev->priv;
+
+ if (new_mtu < TG3_MIN_MTU || new_mtu > TG3_MAX_MTU)
+ return -EINVAL;
+
+ if (!netif_running(dev)) {
+ /* We'll just catch it later when the
+ * device is up'd.
+ */
+ tg3_set_mtu(dev, tp, new_mtu);
+ return 0;
+ }
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tg3_halt(tp);
+
+ tg3_set_mtu(dev, tp, new_mtu);
+
+ tg3_init_rings(tp);
+ tg3_init_hw(tp);
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+}
+
+/* Free up pending packets in all rx/tx rings.
+ *
+ * The chip has been shut down and the driver detached from
+ * the networking, so no interrupts or new tx packets will
+ * end up in the driver. tp->{tx,}lock is not held and we are not
+ * in an interrupt context and thus may sleep.
+ */
+static void tg3_free_rings(struct tg3 *tp)
+{
+ struct ring_info *rxp;
+ int i;
+
+ for (i = 0; i < TG3_RX_RING_SIZE; i++) {
+ rxp = &tp->rx_std_buffers[i];
+
+ if (rxp->skb == NULL)
+ continue;
+ pci_unmap_single(tp->pdev,
+ pci_unmap_addr(rxp, mapping),
+ RX_PKT_BUF_SZ - tp->rx_offset,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb_any(rxp->skb);
+ rxp->skb = NULL;
+ }
+
+ for (i = 0; i < TG3_RX_JUMBO_RING_SIZE; i++) {
+ rxp = &tp->rx_jumbo_buffers[i];
+
+ if (rxp->skb == NULL)
+ continue;
+ pci_unmap_single(tp->pdev,
+ pci_unmap_addr(rxp, mapping),
+ RX_JUMBO_PKT_BUF_SZ - tp->rx_offset,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb_any(rxp->skb);
+ rxp->skb = NULL;
+ }
+
+ for (i = 0; i < TG3_TX_RING_SIZE; ) {
+ struct tx_ring_info *txp;
+ struct sk_buff *skb;
+ int j;
+
+ txp = &tp->tx_buffers[i];
+ skb = txp->skb;
+
+ if (skb == NULL) {
+ i++;
+ continue;
+ }
+
+ pci_unmap_single(tp->pdev,
+ pci_unmap_addr(txp, mapping),
+ (skb->len - skb->data_len),
+ PCI_DMA_TODEVICE);
+ txp->skb = NULL;
+
+ i++;
+
+ for (j = 0; j < skb_shinfo(skb)->nr_frags; j++) {
+ txp = &tp->tx_buffers[i & (TG3_TX_RING_SIZE - 1)];
+ pci_unmap_page(tp->pdev,
+ pci_unmap_addr(txp, mapping),
+ skb_shinfo(skb)->frags[j].size,
+ PCI_DMA_TODEVICE);
+ i++;
+ }
+
+ dev_kfree_skb_any(skb);
+ }
+}
+
+/* Initialize tx/rx rings for packet processing.
+ *
+ * The chip has been shut down and the driver detached from
+ * the networking, so no interrupts or new tx packets will
+ * end up in the driver. tp->{tx,}lock is not held and we are not
+ * in an interrupt context and thus may sleep.
+ */
+static void tg3_init_rings(struct tg3 *tp)
+{
+ unsigned long start, end;
+ u32 i;
+
+ /* Free up all the SKBs. */
+ tg3_free_rings(tp);
+
+ /* Zero out all descriptors. */
+ memset(tp->rx_std, 0, TG3_RX_RING_BYTES);
+ memset(tp->rx_jumbo, 0, TG3_RX_JUMBO_RING_BYTES);
+ memset(tp->rx_rcb, 0, TG3_RX_RCB_RING_BYTES);
+
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+ memset(tp->tx_ring, 0, TG3_TX_RING_BYTES);
+ } else {
+ start = (tp->regs +
+ NIC_SRAM_WIN_BASE +
+ NIC_SRAM_TX_BUFFER_DESC);
+ end = start + TG3_TX_RING_BYTES;
+ while (start < end) {
+ writel(0, start);
+ start += 4;
+ }
+ for (i = 0; i < TG3_TX_RING_SIZE; i++)
+ tp->tx_buffers[i].prev_vlan_tag = 0;
+ }
+
+ /* Initialize invariants of the rings, we only set this
+ * stuff once. This works because the card does not
+ * write into the rx buffer posting rings.
+ */
+ for (i = 0; i < TG3_RX_RING_SIZE; i++) {
+ struct tg3_rx_buffer_desc *rxd;
+
+ rxd = &tp->rx_std[i];
+ rxd->idx_len = (RX_PKT_BUF_SZ - tp->rx_offset - 64)
+ << RXD_LEN_SHIFT;
+ rxd->type_flags = (RXD_FLAG_END << RXD_FLAGS_SHIFT);
+ rxd->opaque = (RXD_OPAQUE_RING_STD |
+ (i << RXD_OPAQUE_INDEX_SHIFT));
+ }
+
+ if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE) {
+ for (i = 0; i < TG3_RX_JUMBO_RING_SIZE; i++) {
+ struct tg3_rx_buffer_desc *rxd;
+
+ rxd = &tp->rx_jumbo[i];
+ rxd->idx_len = (RX_JUMBO_PKT_BUF_SZ - tp->rx_offset - 64)
+ << RXD_LEN_SHIFT;
+ rxd->type_flags = (RXD_FLAG_END << RXD_FLAGS_SHIFT) |
+ RXD_FLAG_JUMBO;
+ rxd->opaque = (RXD_OPAQUE_RING_JUMBO |
+ (i << RXD_OPAQUE_INDEX_SHIFT));
+ }
+ }
+
+ /* Now allocate fresh SKBs for each rx ring. */
+ for (i = 0; i < tp->rx_pending; i++) {
+ if (tg3_alloc_rx_skb(tp, RXD_OPAQUE_RING_STD,
+ -1, i) < 0)
+ break;
+ }
+
+ if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE) {
+ for (i = 0; i < tp->rx_jumbo_pending; i++) {
+ if (tg3_alloc_rx_skb(tp, RXD_OPAQUE_RING_JUMBO,
+ -1, i) < 0)
+ break;
+ }
+ }
+}
+
+/*
+ * Must not be invoked with interrupt sources disabled and
+ * the hardware shutdown down.
+ */
+static void tg3_free_consistent(struct tg3 *tp)
+{
+ if (tp->rx_std_buffers) {
+ kfree(tp->rx_std_buffers);
+ tp->rx_std_buffers = NULL;
+ }
+ if (tp->rx_std) {
+ pci_free_consistent(tp->pdev, TG3_RX_RING_BYTES,
+ tp->rx_std, tp->rx_std_mapping);
+ tp->rx_std = NULL;
+ }
+ if (tp->rx_jumbo) {
+ pci_free_consistent(tp->pdev, TG3_RX_JUMBO_RING_BYTES,
+ tp->rx_jumbo, tp->rx_jumbo_mapping);
+ tp->rx_jumbo = NULL;
+ }
+ if (tp->rx_rcb) {
+ pci_free_consistent(tp->pdev, TG3_RX_RCB_RING_BYTES,
+ tp->rx_rcb, tp->rx_rcb_mapping);
+ tp->rx_rcb = NULL;
+ }
+ if (tp->tx_ring) {
+ pci_free_consistent(tp->pdev, TG3_TX_RING_BYTES,
+ tp->tx_ring, tp->tx_desc_mapping);
+ tp->tx_ring = NULL;
+ }
+ if (tp->hw_status) {
+ pci_free_consistent(tp->pdev, TG3_HW_STATUS_SIZE,
+ tp->hw_status, tp->status_mapping);
+ tp->hw_status = NULL;
+ }
+ if (tp->hw_stats) {
+ pci_free_consistent(tp->pdev, sizeof(struct tg3_hw_stats),
+ tp->hw_stats, tp->stats_mapping);
+ tp->hw_stats = NULL;
+ }
+}
+
+/*
+ * Must not be invoked with interrupt sources disabled and
+ * the hardware shutdown down. Can sleep.
+ */
+static int tg3_alloc_consistent(struct tg3 *tp)
+{
+ tp->rx_std_buffers = kmalloc((sizeof(struct ring_info) *
+ (TG3_RX_RING_SIZE +
+ TG3_RX_JUMBO_RING_SIZE)) +
+ (sizeof(struct tx_ring_info) *
+ TG3_TX_RING_SIZE),
+ GFP_KERNEL);
+ if (!tp->rx_std_buffers)
+ return -ENOMEM;
+
+ memset(tp->rx_std_buffers, 0,
+ (sizeof(struct ring_info) *
+ (TG3_RX_RING_SIZE +
+ TG3_RX_JUMBO_RING_SIZE)) +
+ (sizeof(struct tx_ring_info) *
+ TG3_TX_RING_SIZE));
+
+ tp->rx_jumbo_buffers = &tp->rx_std_buffers[TG3_RX_RING_SIZE];
+ tp->tx_buffers = (struct tx_ring_info *)
+ &tp->rx_jumbo_buffers[TG3_RX_JUMBO_RING_SIZE];
+
+ tp->rx_std = pci_alloc_consistent(tp->pdev, TG3_RX_RING_BYTES,
+ &tp->rx_std_mapping);
+ if (!tp->rx_std)
+ goto err_out;
+
+ tp->rx_jumbo = pci_alloc_consistent(tp->pdev, TG3_RX_JUMBO_RING_BYTES,
+ &tp->rx_jumbo_mapping);
+
+ if (!tp->rx_jumbo)
+ goto err_out;
+
+ tp->rx_rcb = pci_alloc_consistent(tp->pdev, TG3_RX_RCB_RING_BYTES,
+ &tp->rx_rcb_mapping);
+ if (!tp->rx_rcb)
+ goto err_out;
+
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+ tp->tx_ring = pci_alloc_consistent(tp->pdev, TG3_TX_RING_BYTES,
+ &tp->tx_desc_mapping);
+ if (!tp->tx_ring)
+ goto err_out;
+ } else {
+ tp->tx_ring = NULL;
+ tp->tx_desc_mapping = 0;
+ }
+
+ tp->hw_status = pci_alloc_consistent(tp->pdev,
+ TG3_HW_STATUS_SIZE,
+ &tp->status_mapping);
+ if (!tp->hw_status)
+ goto err_out;
+
+ tp->hw_stats = pci_alloc_consistent(tp->pdev,
+ sizeof(struct tg3_hw_stats),
+ &tp->stats_mapping);
+ if (!tp->hw_stats)
+ goto err_out;
+
+ memset(tp->hw_status, 0, TG3_HW_STATUS_SIZE);
+ memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
+
+ return 0;
+
+err_out:
+ tg3_free_consistent(tp);
+ return -ENOMEM;
+}
+
+#define MAX_WAIT_CNT 1000
+
+/* To stop a block, clear the enable bit and poll till it
+ * clears. tp->lock is held.
+ */
+static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit)
+{
+ unsigned int i;
+ u32 val;
+
+ val = tr32(ofs);
+ val &= ~enable_bit;
+ tw32(ofs, val);
+ tr32(ofs);
+
+ for (i = 0; i < MAX_WAIT_CNT; i++) {
+ udelay(100);
+ val = tr32(ofs);
+ if ((val & enable_bit) == 0)
+ break;
+ }
+
+ if (i == MAX_WAIT_CNT) {
+ printk(KERN_ERR PFX "tg3_stop_block timed out, "
+ "ofs=%lx enable_bit=%x\n",
+ ofs, enable_bit);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/* tp->lock is held. */
+static int tg3_abort_hw(struct tg3 *tp)
+{
+ int i, err;
+
+ tg3_disable_ints(tp);
+
+ tp->rx_mode &= ~RX_MODE_ENABLE;
+ tw32(MAC_RX_MODE, tp->rx_mode);
+ tr32(MAC_RX_MODE);
+ udelay(10);
+
+ err = tg3_stop_block(tp, RCVBDI_MODE, RCVBDI_MODE_ENABLE);
+ err |= tg3_stop_block(tp, RCVLPC_MODE, RCVLPC_MODE_ENABLE);
+ err |= tg3_stop_block(tp, RCVLSC_MODE, RCVLSC_MODE_ENABLE);
+ err |= tg3_stop_block(tp, RCVDBDI_MODE, RCVDBDI_MODE_ENABLE);
+ err |= tg3_stop_block(tp, RCVDCC_MODE, RCVDCC_MODE_ENABLE);
+ err |= tg3_stop_block(tp, RCVCC_MODE, RCVCC_MODE_ENABLE);
+
+ err |= tg3_stop_block(tp, SNDBDS_MODE, SNDBDS_MODE_ENABLE);
+ err |= tg3_stop_block(tp, SNDBDI_MODE, SNDBDI_MODE_ENABLE);
+ err |= tg3_stop_block(tp, SNDDATAI_MODE, SNDDATAI_MODE_ENABLE);
+ err |= tg3_stop_block(tp, RDMAC_MODE, RDMAC_MODE_ENABLE);
+ err |= tg3_stop_block(tp, SNDDATAC_MODE, SNDDATAC_MODE_ENABLE);
+ err |= tg3_stop_block(tp, SNDBDC_MODE, SNDBDC_MODE_ENABLE);
+ if (err)
+ goto out;
+
+ tp->mac_mode &= ~MAC_MODE_TDE_ENABLE;
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ tp->tx_mode &= ~TX_MODE_ENABLE;
+ tw32(MAC_TX_MODE, tp->tx_mode);
+ tr32(MAC_TX_MODE);
+
+ for (i = 0; i < MAX_WAIT_CNT; i++) {
+ udelay(100);
+ if (!(tr32(MAC_TX_MODE) & TX_MODE_ENABLE))
+ break;
+ }
+ if (i >= MAX_WAIT_CNT) {
+ printk(KERN_ERR PFX "tg3_abort_hw timed out for %s, "
+ "TX_MODE_ENABLE will not clear MAC_TX_MODE=%08x\n",
+ tp->dev->name, tr32(MAC_TX_MODE));
+ return -ENODEV;
+ }
+
+ err = tg3_stop_block(tp, HOSTCC_MODE, HOSTCC_MODE_ENABLE);
+ err |= tg3_stop_block(tp, WDMAC_MODE, WDMAC_MODE_ENABLE);
+ err |= tg3_stop_block(tp, MBFREE_MODE, MBFREE_MODE_ENABLE);
+
+ tw32(FTQ_RESET, 0xffffffff);
+ tw32(FTQ_RESET, 0x00000000);
+
+ err |= tg3_stop_block(tp, BUFMGR_MODE, BUFMGR_MODE_ENABLE);
+ err |= tg3_stop_block(tp, MEMARB_MODE, MEMARB_MODE_ENABLE);
+ if (err)
+ goto out;
+
+ memset(tp->hw_status, 0, TG3_HW_STATUS_SIZE);
+
+out:
+ return err;
+}
+
+/* tp->lock is held. */
+static void tg3_chip_reset(struct tg3 *tp)
+{
+ u32 val;
+
+ /* Force NVRAM to settle.
+ * This deals with a chip bug which can result in EEPROM
+ * corruption.
+ */
+ if (tp->tg3_flags & TG3_FLAG_NVRAM) {
+ int i;
+
+ tw32(NVRAM_SWARB, SWARB_REQ_SET1);
+ for (i = 0; i < 100000; i++) {
+ if (tr32(NVRAM_SWARB) & SWARB_GNT1)
+ break;
+ udelay(10);
+ }
+ }
+
+ tw32(GRC_MISC_CFG, GRC_MISC_CFG_CORECLK_RESET);
+
+ /* Flush PCI posted writes. The normal MMIO registers
+ * are inaccessible at this time so this is the only
+ * way to make this reliably. I tried to use indirect
+ * register read/write but this upset some 5701 variants.
+ */
+ pci_read_config_dword(tp->pdev, PCI_COMMAND, &val);
+
+ udelay(40);
+ udelay(40);
+ udelay(40);
+
+ /* Re-enable indirect register accesses. */
+ pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+ tp->misc_host_ctrl);
+
+ /* Set MAX PCI retry to zero. */
+ val = (PCISTATE_ROM_ENABLE | PCISTATE_ROM_RETRY_ENABLE);
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0 &&
+ (tp->tg3_flags & TG3_FLAG_PCIX_MODE))
+ val |= PCISTATE_RETRY_SAME_DMA;
+ pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, val);
+
+ pci_restore_state(tp->pdev, tp->pci_cfg_state);
+
+ /* Make sure PCI-X relaxed ordering bit is clear. */
+ pci_read_config_dword(tp->pdev, TG3PCI_X_CAPS, &val);
+ val &= ~PCIX_CAPS_RELAXED_ORDERING;
+ pci_write_config_dword(tp->pdev, TG3PCI_X_CAPS, val);
+
+ tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+
+ tw32(TG3PCI_MISC_HOST_CTRL, tp->misc_host_ctrl);
+}
+
+/* tp->lock is held. */
+static void tg3_stop_fw(struct tg3 *tp)
+{
+ if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
+ u32 val;
+ int i;
+
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_PAUSE_FW);
+ val = tr32(GRC_RX_CPU_EVENT);
+ val |= (1 << 14);
+ tw32(GRC_RX_CPU_EVENT, val);
+
+ /* Wait for RX cpu to ACK the event. */
+ for (i = 0; i < 100; i++) {
+ if (!(tr32(GRC_RX_CPU_EVENT) & (1 << 14)))
+ break;
+ udelay(1);
+ }
+ }
+}
+
+/* tp->lock is held. */
+static int tg3_halt(struct tg3 *tp)
+{
+ u32 val;
+ int i;
+
+ tg3_stop_fw(tp);
+ tg3_abort_hw(tp);
+ tg3_chip_reset(tp);
+ tg3_write_mem(tp,
+ NIC_SRAM_FIRMWARE_MBOX,
+ NIC_SRAM_FIRMWARE_MBOX_MAGIC1);
+ for (i = 0; i < 100000; i++) {
+ tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val);
+ if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1)
+ break;
+ udelay(10);
+ }
+
+ if (i >= 100000) {
+ printk(KERN_ERR PFX "tg3_halt timed out for %s, "
+ "firmware will not restart magic=%08x\n",
+ tp->dev->name, val);
+ return -ENODEV;
+ }
+
+ if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
+ if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)
+ tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+ DRV_STATE_WOL);
+ else
+ tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+ DRV_STATE_UNLOAD);
+ } else
+ tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+ DRV_STATE_SUSPEND);
+
+ return 0;
+}
+
+#define TG3_FW_RELEASE_MAJOR 0x0
+#define TG3_FW_RELASE_MINOR 0x0
+#define TG3_FW_RELEASE_FIX 0x0
+#define TG3_FW_START_ADDR 0x08000000
+#define TG3_FW_TEXT_ADDR 0x08000000
+#define TG3_FW_TEXT_LEN 0x9c0
+#define TG3_FW_RODATA_ADDR 0x080009c0
+#define TG3_FW_RODATA_LEN 0x60
+#define TG3_FW_DATA_ADDR 0x08000a40
+#define TG3_FW_DATA_LEN 0x20
+#define TG3_FW_SBSS_ADDR 0x08000a60
+#define TG3_FW_SBSS_LEN 0xc
+#define TG3_FW_BSS_ADDR 0x08000a70
+#define TG3_FW_BSS_LEN 0x10
+
+static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
+ 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
+ 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000,
+ 0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034,
+ 0x0e00021c, 0x00000000, 0x0000000d, 0x00000000, 0x00000000, 0x00000000,
+ 0x27bdffe0, 0x3c1cc000, 0xafbf0018, 0xaf80680c, 0x0e00004c, 0x241b2105,
+ 0x97850000, 0x97870002, 0x9782002c, 0x9783002e, 0x3c040800, 0x248409c0,
+ 0xafa00014, 0x00021400, 0x00621825, 0x00052c00, 0xafa30010, 0x8f860010,
+ 0x00e52825, 0x0e000060, 0x24070102, 0x3c02ac00, 0x34420100, 0x3c03ac01,
+ 0x34630100, 0xaf820490, 0x3c02ffff, 0xaf820494, 0xaf830498, 0xaf82049c,
+ 0x24020001, 0xaf825ce0, 0x0e00003f, 0xaf825d00, 0x0e000140, 0x00000000,
+ 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x2402ffff, 0xaf825404, 0x8f835400,
+ 0x34630400, 0xaf835400, 0xaf825404, 0x3c020800, 0x24420034, 0xaf82541c,
+ 0x03e00008, 0xaf805400, 0x00000000, 0x00000000, 0x3c020800, 0x34423000,
+ 0x3c030800, 0x34633000, 0x3c040800, 0x348437ff, 0x3c010800, 0xac220a64,
+ 0x24020040, 0x3c010800, 0xac220a68, 0x3c010800, 0xac200a60, 0xac600000,
+ 0x24630004, 0x0083102b, 0x5040fffd, 0xac600000, 0x03e00008, 0x00000000,
+ 0x00804821, 0x8faa0010, 0x3c020800, 0x8c420a60, 0x3c040800, 0x8c840a68,
+ 0x8fab0014, 0x24430001, 0x0044102b, 0x3c010800, 0xac230a60, 0x14400003,
+ 0x00004021, 0x3c010800, 0xac200a60, 0x3c020800, 0x8c420a60, 0x3c030800,
+ 0x8c630a64, 0x91240000, 0x00021140, 0x00431021, 0x00481021, 0x25080001,
+ 0xa0440000, 0x29020008, 0x1440fff4, 0x25290001, 0x3c020800, 0x8c420a60,
+ 0x3c030800, 0x8c630a64, 0x8f84680c, 0x00021140, 0x00431021, 0xac440008,
+ 0xac45000c, 0xac460010, 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0x02000008, 0x00000000, 0x0a0001e3, 0x3c0a0001, 0x0a0001e3, 0x3c0a0002,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+ 0x0a0001e3, 0x3c0a0007, 0x0a0001e3, 0x3c0a0008, 0x0a0001e3, 0x3c0a0009,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a000b,
+ 0x0a0001e3, 0x3c0a000c, 0x0a0001e3, 0x3c0a000d, 0x0a0001e3, 0x00000000,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a000e, 0x0a0001e3, 0x00000000,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+ 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a0013, 0x0a0001e3, 0x3c0a0014,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x27bdffe0, 0x00001821, 0x00001021, 0xafbf0018, 0xafb10014, 0xafb00010,
+ 0x3c010800, 0x00220821, 0xac200a70, 0x3c010800, 0x00220821, 0xac200a74,
+ 0x3c010800, 0x00220821, 0xac200a78, 0x24630001, 0x1860fff5, 0x2442000c,
+ 0x24110001, 0x8f906810, 0x32020004, 0x14400005, 0x24040001, 0x3c020800,
+ 0x8c420a78, 0x18400003, 0x00002021, 0x0e000182, 0x00000000, 0x32020001,
+ 0x10400003, 0x00000000, 0x0e000169, 0x00000000, 0x0a000153, 0xaf915028,
+ 0x8fbf0018, 0x8fb10014, 0x8fb00010, 0x03e00008, 0x27bd0020, 0x3c050800,
+ 0x8ca50a70, 0x3c060800, 0x8cc60a80, 0x3c070800, 0x8ce70a78, 0x27bdffe0,
+ 0x3c040800, 0x248409d0, 0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014,
+ 0x0e00017b, 0x00002021, 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x24020001,
+ 0x8f836810, 0x00821004, 0x00021027, 0x00621824, 0x03e00008, 0xaf836810,
+ 0x27bdffd8, 0xafbf0024, 0x1080002e, 0xafb00020, 0x8f825cec, 0xafa20018,
+ 0x8f825cec, 0x3c100800, 0x26100a78, 0xafa2001c, 0x34028000, 0xaf825cec,
+ 0x8e020000, 0x18400016, 0x00000000, 0x3c020800, 0x94420a74, 0x8fa3001c,
+ 0x000221c0, 0xac830004, 0x8fa2001c, 0x3c010800, 0x0e000201, 0xac220a74,
+ 0x10400005, 0x00000000, 0x8e020000, 0x24420001, 0x0a0001df, 0xae020000,
+ 0x3c020800, 0x8c420a70, 0x00021c02, 0x000321c0, 0x0a0001c5, 0xafa2001c,
+ 0x0e000201, 0x00000000, 0x1040001f, 0x00000000, 0x8e020000, 0x8fa3001c,
+ 0x24420001, 0x3c010800, 0xac230a70, 0x3c010800, 0xac230a74, 0x0a0001df,
+ 0xae020000, 0x3c100800, 0x26100a78, 0x8e020000, 0x18400028, 0x00000000,
+ 0x0e000201, 0x00000000, 0x14400024, 0x00000000, 0x8e020000, 0x3c030800,
+ 0x8c630a70, 0x2442ffff, 0xafa3001c, 0x18400006, 0xae020000, 0x00031402,
+ 0x000221c0, 0x8c820004, 0x3c010800, 0xac220a70, 0x97a2001e, 0x2442ff00,
+ 0x2c420300, 0x1440000b, 0x24024000, 0x3c040800, 0x248409dc, 0xafa00010,
+ 0xafa00014, 0x8fa6001c, 0x24050008, 0x0e000060, 0x00003821, 0x0a0001df,
+ 0x00000000, 0xaf825cf8, 0x3c020800, 0x8c420a40, 0x8fa3001c, 0x24420001,
+ 0xaf835cf8, 0x3c010800, 0xac220a40, 0x8fbf0024, 0x8fb00020, 0x03e00008,
+ 0x27bd0028, 0x27bdffe0, 0x3c040800, 0x248409e8, 0x00002821, 0x00003021,
+ 0x00003821, 0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014, 0x8fbf0018,
+ 0x03e00008, 0x27bd0020, 0x8f82680c, 0x8f85680c, 0x00021827, 0x0003182b,
+ 0x00031823, 0x00431024, 0x00441021, 0x00a2282b, 0x10a00006, 0x00000000,
+ 0x00401821, 0x8f82680c, 0x0043102b, 0x1440fffd, 0x00000000, 0x03e00008,
+ 0x00000000, 0x3c040800, 0x8c840000, 0x3c030800, 0x8c630a40, 0x0064102b,
+ 0x54400002, 0x00831023, 0x00641023, 0x2c420008, 0x03e00008, 0x38420001,
+ 0x27bdffe0, 0x00802821, 0x3c040800, 0x24840a00, 0x00003021, 0x00003821,
+ 0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014, 0x0a000216, 0x00000000,
+ 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x00000000, 0x27bdffe0, 0x3c1cc000,
+ 0xafbf0018, 0x0e00004c, 0xaf80680c, 0x3c040800, 0x24840a10, 0x03802821,
+ 0x00003021, 0x00003821, 0xafa00010, 0x0e000060, 0xafa00014, 0x2402ffff,
+ 0xaf825404, 0x3c0200aa, 0x0e000234, 0xaf825434, 0x8fbf0018, 0x03e00008,
+ 0x27bd0020, 0x00000000, 0x00000000, 0x00000000, 0x27bdffe8, 0xafb00010,
+ 0x24100001, 0xafbf0014, 0x3c01c003, 0xac200000, 0x8f826810, 0x30422000,
+ 0x10400003, 0x00000000, 0x0e000246, 0x00000000, 0x0a00023a, 0xaf905428,
+ 0x8fbf0014, 0x8fb00010, 0x03e00008, 0x27bd0018, 0x27bdfff8, 0x8f845d0c,
+ 0x3c0200ff, 0x3c030800, 0x8c630a50, 0x3442fff8, 0x00821024, 0x1043001e,
+ 0x3c0500ff, 0x34a5fff8, 0x3c06c003, 0x3c074000, 0x00851824, 0x8c620010,
+ 0x3c010800, 0xac230a50, 0x30420008, 0x10400005, 0x00871025, 0x8cc20000,
+ 0x24420001, 0xacc20000, 0x00871025, 0xaf825d0c, 0x8fa20000, 0x24420001,
+ 0xafa20000, 0x8fa20000, 0x8fa20000, 0x24420001, 0xafa20000, 0x8fa20000,
+ 0x8f845d0c, 0x3c030800, 0x8c630a50, 0x00851024, 0x1443ffe8, 0x00851824,
+ 0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000
+};
+
+static u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
+ 0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430,
+ 0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
+ 0x00000000, 0x00000000, 0x4d61696e, 0x43707542, 0x00000000, 0x00000000,
+ 0x00000000
+};
+
+#if 0 /* All zeros, dont eat up space with it. */
+u32 tg3FwData[(TG3_FW_DATA_LEN / sizeof(u32)) + 1] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+#endif
+
+#define RX_CPU_SCRATCH_BASE 0x30000
+#define RX_CPU_SCRATCH_SIZE 0x04000
+#define TX_CPU_SCRATCH_BASE 0x34000
+#define TX_CPU_SCRATCH_SIZE 0x04000
+
+/* tp->lock is held. */
+static int tg3_reset_cpu(struct tg3 *tp, u32 offset)
+{
+ int i;
+
+ tw32(offset + CPU_STATE, 0xffffffff);
+ tw32(offset + CPU_MODE, CPU_MODE_RESET);
+ if (offset == RX_CPU_BASE) {
+ for (i = 0; i < 10000; i++)
+ if (!(tr32(offset + CPU_MODE) & CPU_MODE_RESET))
+ break;
+ tw32(offset + CPU_STATE, 0xffffffff);
+ tw32(offset + CPU_MODE, CPU_MODE_RESET);
+ tr32(offset + CPU_MODE);
+ udelay(10);
+ } else {
+ for (i = 0; i < 10000; i++) {
+ if (!(tr32(offset + CPU_MODE) & CPU_MODE_RESET))
+ break;
+ tw32(offset + CPU_STATE, 0xffffffff);
+ tw32(offset + CPU_MODE, CPU_MODE_RESET);
+ tr32(offset + CPU_MODE);
+ udelay(10);
+ }
+ }
+
+ if (i >= 10000) {
+ printk(KERN_ERR PFX "tg3_reset_cpu timed out for %s, "
+ "and %s CPU\n",
+ tp->dev->name,
+ (offset == RX_CPU_BASE ? "RX" : "TX"));
+ return -ENODEV;
+ }
+ return 0;
+}
+
+struct fw_info {
+ unsigned int text_base;
+ unsigned int text_len;
+ u32 *text_data;
+ unsigned int rodata_base;
+ unsigned int rodata_len;
+ u32 *rodata_data;
+ unsigned int data_base;
+ unsigned int data_len;
+ u32 *data_data;
+};
+
+/* tp->lock is held. */
+static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_base,
+ int cpu_scratch_size, struct fw_info *info)
+{
+ int err, i;
+ u32 orig_tg3_flags = tp->tg3_flags;
+
+ /* Force use of PCI config space for indirect register
+ * write calls.
+ */
+ tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG;
+
+ err = tg3_reset_cpu(tp, cpu_base);
+ if (err)
+ goto out;
+
+ for (i = 0; i < cpu_scratch_size; i += sizeof(u32))
+ tg3_write_indirect_reg32(tp, cpu_scratch_base + i, 0);
+ tw32(cpu_base + CPU_STATE, 0xffffffff);
+ tw32(cpu_base + CPU_MODE, tr32(cpu_base+CPU_MODE)|CPU_MODE_HALT);
+ for (i = 0; i < (info->text_len / sizeof(u32)); i++)
+ tg3_write_indirect_reg32(tp, (cpu_scratch_base +
+ (info->text_base & 0xffff) +
+ (i * sizeof(u32))),
+ (info->text_data ?
+ info->text_data[i] : 0));
+ for (i = 0; i < (info->rodata_len / sizeof(u32)); i++)
+ tg3_write_indirect_reg32(tp, (cpu_scratch_base +
+ (info->rodata_base & 0xffff) +
+ (i * sizeof(u32))),
+ (info->rodata_data ?
+ info->rodata_data[i] : 0));
+ for (i = 0; i < (info->data_len / sizeof(u32)); i++)
+ tg3_write_indirect_reg32(tp, (cpu_scratch_base +
+ (info->data_base & 0xffff) +
+ (i * sizeof(u32))),
+ (info->data_data ?
+ info->data_data[i] : 0));
+
+ err = 0;
+
+out:
+ tp->tg3_flags = orig_tg3_flags;
+ return err;
+}
+
+/* tp->lock is held. */
+static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
+{
+ struct fw_info info;
+ int err, i;
+
+ info.text_base = TG3_FW_TEXT_ADDR;
+ info.text_len = TG3_FW_TEXT_LEN;
+ info.text_data = &tg3FwText[0];
+ info.rodata_base = TG3_FW_RODATA_ADDR;
+ info.rodata_len = TG3_FW_RODATA_LEN;
+ info.rodata_data = &tg3FwRodata[0];
+ info.data_base = TG3_FW_DATA_ADDR;
+ info.data_len = TG3_FW_DATA_LEN;
+ info.data_data = NULL;
+
+ err = tg3_load_firmware_cpu(tp, RX_CPU_BASE,
+ RX_CPU_SCRATCH_BASE, RX_CPU_SCRATCH_SIZE,
+ &info);
+ if (err)
+ return err;
+
+ err = tg3_load_firmware_cpu(tp, TX_CPU_BASE,
+ TX_CPU_SCRATCH_BASE, TX_CPU_SCRATCH_SIZE,
+ &info);
+ if (err)
+ return err;
+
+ /* Now startup only the RX cpu. */
+ tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
+ tw32(RX_CPU_BASE + CPU_PC, TG3_FW_TEXT_ADDR);
+
+ /* Flush posted writes. */
+ tr32(RX_CPU_BASE + CPU_PC);
+ for (i = 0; i < 5; i++) {
+ if (tr32(RX_CPU_BASE + CPU_PC) == TG3_FW_TEXT_ADDR)
+ break;
+ tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
+ tw32(RX_CPU_BASE + CPU_MODE, CPU_MODE_HALT);
+ tw32(RX_CPU_BASE + CPU_PC, TG3_FW_TEXT_ADDR);
+
+ /* Flush posted writes. */
+ tr32(RX_CPU_BASE + CPU_PC);
+
+ udelay(1000);
+ }
+ if (i >= 5) {
+ printk(KERN_ERR PFX "tg3_load_firmware fails for %s "
+ "to set RX CPU PC, is %08x should be %08x\n",
+ tp->dev->name, tr32(RX_CPU_BASE + CPU_PC),
+ TG3_FW_TEXT_ADDR);
+ return -ENODEV;
+ }
+ tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
+ tw32(RX_CPU_BASE + CPU_MODE, 0x00000000);
+
+ /* Flush posted writes. */
+ tr32(RX_CPU_BASE + CPU_MODE);
+
+ return 0;
+}
+
+#if TG3_DO_TSO != 0
+
+#define TG3_TSO_FW_RELEASE_MAJOR 0x1
+#define TG3_TSO_FW_RELASE_MINOR 0x8
+#define TG3_TSO_FW_RELEASE_FIX 0x0
+#define TG3_TSO_FW_START_ADDR 0x08000000
+#define TG3_TSO_FW_TEXT_ADDR 0x08000000
+#define TG3_TSO_FW_TEXT_LEN 0x1650
+#define TG3_TSO_FW_RODATA_ADDR 0x08001650
+#define TG3_TSO_FW_RODATA_LEN 0x30
+#define TG3_TSO_FW_DATA_ADDR 0x080016a0
+#define TG3_TSO_FW_DATA_LEN 0x20
+#define TG3_TSO_FW_SBSS_ADDR 0x080016c0
+#define TG3_TSO_FW_SBSS_LEN 0x14
+#define TG3_TSO_FW_BSS_ADDR 0x080016e0
+#define TG3_TSO_FW_BSS_LEN 0x8fc
+
+static u32 tg3TsoFwText[] = {
+ 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
+ 0x37bd4000, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000010, 0x00000000,
+ 0x0000000d, 0x00000000, 0x00000000, 0x00000000, 0x27bdffe0, 0x3c1bc000,
+ 0xafbf0018, 0x0e000058, 0xaf60680c, 0x3c040800, 0x24841650, 0x03602821,
+ 0x24060001, 0x24070004, 0xafa00010, 0x0e00006c, 0xafa00014, 0x8f625c50,
+ 0x34420001, 0xaf625c50, 0x8f625c90, 0x34420001, 0xaf625c90, 0x2402ffff,
+ 0x0e000098, 0xaf625404, 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x00000000,
+ 0x00000000, 0x00000000, 0x24030b60, 0x24050fff, 0xac000b50, 0x00002021,
+ 0xac640000, 0x24630004, 0x0065102b, 0x1440fffc, 0x24840001, 0x24030b60,
+ 0x0065102b, 0x10400011, 0x00002021, 0x24090b54, 0x3c06dead, 0x34c6beef,
+ 0x24080b58, 0x24070b5c, 0x8c620000, 0x50440006, 0x24630004, 0xad260000,
+ 0x8c620000, 0xace40000, 0xad020000, 0x24630004, 0x0065102b, 0x1440fff6,
+ 0x24840001, 0x03e00008, 0x00000000, 0x27bdfff8, 0x18800009, 0x00002821,
+ 0x8f63680c, 0x8f62680c, 0x1043fffe, 0x00000000, 0x24a50001, 0x00a4102a,
+ 0x1440fff9, 0x00000000, 0x03e00008, 0x27bd0008, 0x3c020800, 0x34423000,
+ 0x3c030800, 0x34633000, 0x3c040800, 0x348437ff, 0x3c010800, 0xac2216c4,
+ 0x24020040, 0x3c010800, 0xac2216c8, 0x3c010800, 0xac2016c0, 0xac600000,
+ 0x24630004, 0x0083102b, 0x5040fffd, 0xac600000, 0x03e00008, 0x00000000,
+ 0x00804821, 0x8faa0010, 0x3c020800, 0x8c4216c0, 0x3c040800, 0x8c8416c8,
+ 0x8fab0014, 0x24430001, 0x0044102b, 0x3c010800, 0xac2316c0, 0x14400003,
+ 0x00004021, 0x3c010800, 0xac2016c0, 0x3c020800, 0x8c4216c0, 0x3c030800,
+ 0x8c6316c4, 0x91240000, 0x00021140, 0x00431021, 0x00481021, 0x25080001,
+ 0xa0440000, 0x29020008, 0x1440fff4, 0x25290001, 0x3c020800, 0x8c4216c0,
+ 0x3c030800, 0x8c6316c4, 0x8f64680c, 0x00021140, 0x00431021, 0xac440008,
+ 0xac45000c, 0xac460010, 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c,
+ 0x00000000, 0x00000000, 0x27bdffe0, 0xafbf0018, 0xafb10014, 0x0e0000b6,
+ 0xafb00010, 0x24110001, 0x8f706820, 0x32020100, 0x10400003, 0x00000000,
+ 0x0e000127, 0x00000000, 0x8f706820, 0x32022000, 0x10400004, 0x32020001,
+ 0x0e00025a, 0x24040001, 0x32020001, 0x10400003, 0x00000000, 0x0e0000e6,
+ 0x00000000, 0x0a00009e, 0xaf715028, 0x8fbf0018, 0x8fb10014, 0x8fb00010,
+ 0x03e00008, 0x27bd0020, 0x27bdffe0, 0x3c040800, 0x24841660, 0x00002821,
+ 0x00003021, 0x00003821, 0xafbf0018, 0xafa00010, 0x0e00006c, 0xafa00014,
+ 0x3c010800, 0xa4201fb8, 0x3c010800, 0xa02016f8, 0x3c010800, 0xac2016fc,
+ 0x3c010800, 0xac201700, 0x3c010800, 0xac201704, 0x3c010800, 0xac20170c,
+ 0x3c010800, 0xac201718, 0x3c010800, 0xac20171c, 0x8f624434, 0x3c010800,
+ 0xac2216e8, 0x8f624438, 0x3c010800, 0xac2216ec, 0x8f624410, 0x3c010800,
+ 0xac2016e0, 0x3c010800, 0xac2016e4, 0x3c010800, 0xac201fc0, 0x3c010800,
+ 0xac201f68, 0x3c010800, 0xac201f6c, 0x3c010800, 0xac2216f0, 0x8fbf0018,
+ 0x03e00008, 0x27bd0020, 0x27bdffe0, 0x3c040800, 0x2484166c, 0x00002821,
+ 0x00003021, 0x00003821, 0xafbf0018, 0xafa00010, 0x0e00006c, 0xafa00014,
+ 0x3c040800, 0x24841660, 0x00002821, 0x00003021, 0x00003821, 0xafa00010,
+ 0x0e00006c, 0xafa00014, 0x3c010800, 0xa4201fb8, 0x3c010800, 0xa02016f8,
+ 0x3c010800, 0xac2016fc, 0x3c010800, 0xac201700, 0x3c010800, 0xac201704,
+ 0x3c010800, 0xac20170c, 0x3c010800, 0xac201718, 0x3c010800, 0xac20171c,
+ 0x8f624434, 0x3c010800, 0xac2216e8, 0x8f624438, 0x3c010800, 0xac2216ec,
+ 0x8f624410, 0x3c010800, 0xac2016e0, 0x3c010800, 0xac2016e4, 0x3c010800,
+ 0xac201fc0, 0x3c010800, 0xac201f68, 0x3c010800, 0xac201f6c, 0x3c010800,
+ 0xac2216f0, 0x0e000120, 0x00002021, 0x8fbf0018, 0x03e00008, 0x27bd0020,
+ 0x24020001, 0x8f636820, 0x00821004, 0x00021027, 0x00621824, 0x03e00008,
+ 0xaf636820, 0x27bdffd0, 0x3c0300ff, 0xafbf002c, 0xafb60028, 0xafb50024,
+ 0xafb40020, 0xafb3001c, 0xafb20018, 0xafb10014, 0xafb00010, 0x8f665c5c,
+ 0x3c040800, 0x2484171c, 0x8c820000, 0x3463fff8, 0x14460005, 0x00c38824,
+ 0x3c020800, 0x904216f8, 0x14400115, 0x00000000, 0x00111902, 0x306300ff,
+ 0x30c20003, 0x000211c0, 0x00623825, 0x00e02821, 0x00061602, 0xac860000,
+ 0x3c030800, 0x906316f8, 0x3044000f, 0x1460002b, 0x00804021, 0x24020001,
+ 0x3c010800, 0xa02216f8, 0x00071100, 0x00821025, 0x3c010800, 0xac2016fc,
+ 0x3c010800, 0xac201700, 0x3c010800, 0xac201704, 0x3c010800, 0xac20170c,
+ 0x3c010800, 0xac201718, 0x3c010800, 0xac201710, 0x3c010800, 0xac201714,
+ 0x3c010800, 0xa4221fb8, 0x9623000c, 0x30628000, 0x10400008, 0x30627fff,
+ 0x2442003e, 0x3c010800, 0xa42216f6, 0x24020001, 0x3c010800, 0x0a00016e,
+ 0xac221fd4, 0x24620036, 0x3c010800, 0xa42216f6, 0x3c010800, 0xac201fd4,
+ 0x3c010800, 0xac201fd0, 0x3c010800, 0x0a000176, 0xac201fd8, 0x9622000c,
+ 0x3c010800, 0xa4221fcc, 0x3c040800, 0x248416fc, 0x8c820000, 0x00021100,
+ 0x3c010800, 0x00220821, 0xac311728, 0x8c820000, 0x00021100, 0x3c010800,
+ 0x00220821, 0xac26172c, 0x8c820000, 0x24a30001, 0x306701ff, 0x00021100,
+ 0x3c010800, 0x00220821, 0xac271730, 0x8c820000, 0x00021100, 0x3c010800,
+ 0x00220821, 0xac281734, 0x96230008, 0x3c020800, 0x8c42170c, 0x00432821,
+ 0x3c010800, 0xac25170c, 0x9622000a, 0x30420004, 0x14400019, 0x00071100,
+ 0x3c02c000, 0x00c21825, 0xaf635c5c, 0x8f625c50, 0x30420002, 0x1440fffc,
+ 0x00000000, 0x8f630c14, 0x3063000f, 0x2c620002, 0x1440001e, 0x00000000,
+ 0x8f630c14, 0x3c020800, 0x8c4216b4, 0x3063000f, 0x24420001, 0x3c010800,
+ 0xac2216b4, 0x2c620002, 0x1040fff7, 0x00000000, 0x0a0001c1, 0x00000000,
+ 0x3c030800, 0x8c6316e0, 0x3c040800, 0x948416f4, 0x01021025, 0x3c010800,
+ 0xa4221fba, 0x24020001, 0x3c010800, 0xac221718, 0x24630001, 0x0085202a,
+ 0x3c010800, 0x10800003, 0xac2316e0, 0x3c010800, 0xa42516f4, 0x3c030800,
+ 0x246316fc, 0x8c620000, 0x24420001, 0xac620000, 0x28420080, 0x14400005,
+ 0x24020001, 0x0e0002df, 0x24040002, 0x0a000250, 0x00000000, 0x3c030800,
+ 0x906316f8, 0x1462007c, 0x24020003, 0x3c160800, 0x96d616f6, 0x3c050800,
+ 0x8ca5170c, 0x32c4ffff, 0x00a4102a, 0x14400078, 0x00000000, 0x3c020800,
+ 0x8c421718, 0x10400005, 0x32c2ffff, 0x14a40003, 0x00000000, 0x3c010800,
+ 0xac231fd0, 0x10400062, 0x00009021, 0x0040a021, 0x3c150800, 0x26b51700,
+ 0x26b30010, 0x8ea20000, 0x00028100, 0x3c110800, 0x02308821, 0x0e0002e1,
+ 0x8e311728, 0x00403021, 0x10c00059, 0x00000000, 0x9628000a, 0x31020040,
+ 0x10400004, 0x2407180c, 0x8e22000c, 0x2407188c, 0xacc20018, 0x31021000,
+ 0x10400004, 0x34e32000, 0x00081040, 0x3042c000, 0x00623825, 0x3c030800,
+ 0x00701821, 0x8c631730, 0x3c020800, 0x00501021, 0x8c421734, 0x00031d00,
+ 0x00021400, 0x00621825, 0xacc30014, 0x8ea30004, 0x96220008, 0x00432023,
+ 0x3242ffff, 0x3083ffff, 0x00431021, 0x0282102a, 0x14400002, 0x02d22823,
+ 0x00802821, 0x8e620000, 0x30a4ffff, 0x00441021, 0xae620000, 0x8e220000,
+ 0xacc20000, 0x8e220004, 0x8e63fff4, 0x00431021, 0xacc20004, 0xa4c5000e,
+ 0x8e62fff4, 0x00441021, 0xae62fff4, 0x96230008, 0x0043102a, 0x14400005,
+ 0x02459021, 0x8e62fff0, 0xae60fff4, 0x24420001, 0xae62fff0, 0xacc00008,
+ 0x3242ffff, 0x14540008, 0x24020305, 0x31020080, 0x54400001, 0x34e70010,
+ 0x24020905, 0xa4c2000c, 0x0a000233, 0x34e70020, 0xa4c2000c, 0x30e2ffff,
+ 0xacc20010, 0x3c020800, 0x8c421fd0, 0x10400003, 0x3c024b65, 0x0a00023d,
+ 0x34427654, 0x3c02b49a, 0x344289ab, 0xacc2001c, 0x0e000560, 0x00c02021,
+ 0x3242ffff, 0x0054102b, 0x1440ffa4, 0x00000000, 0x24020002, 0x3c010800,
+ 0x0a000250, 0xa02216f8, 0x8ea208bc, 0x24420001, 0x0a000250, 0xaea208bc,
+ 0x14620003, 0x00000000, 0x0e000450, 0x00000000, 0x8fbf002c, 0x8fb60028,
+ 0x8fb50024, 0x8fb40020, 0x8fb3001c, 0x8fb20018, 0x8fb10014, 0x8fb00010,
+ 0x03e00008, 0x27bd0030, 0x27bdffd8, 0xafb3001c, 0x00809821, 0xafbf0020,
+ 0xafb20018, 0xafb10014, 0xafb00010, 0x8f725c9c, 0x3c0200ff, 0x3442fff8,
+ 0x3c040800, 0x24841714, 0x02428824, 0x9623000e, 0x8c820000, 0x00431021,
+ 0xac820000, 0x8e220010, 0x30420020, 0x14400011, 0x00000000, 0x0e0002f7,
+ 0x02202021, 0x3c02c000, 0x02421825, 0xaf635c9c, 0x8f625c90, 0x30420002,
+ 0x10400061, 0x00000000, 0xaf635c9c, 0x8f625c90, 0x30420002, 0x1040005c,
+ 0x00000000, 0x0a000278, 0x00000000, 0x8e220008, 0x00021c02, 0x000321c0,
+ 0x3042ffff, 0x3c030800, 0x906316f8, 0x000229c0, 0x24020002, 0x14620003,
+ 0x3c034b65, 0x0a000290, 0x00008021, 0x8e22001c, 0x34637654, 0x10430002,
+ 0x24100002, 0x24100001, 0x0e000300, 0x02003021, 0x24020003, 0x3c010800,
+ 0xa02216f8, 0x24020002, 0x1202000a, 0x24020001, 0x3c030800, 0x8c631fd0,
+ 0x10620006, 0x00000000, 0x3c020800, 0x94421fb8, 0x00021400, 0x0a0002cd,
+ 0xae220014, 0x3c040800, 0x24841fba, 0x94820000, 0x00021400, 0xae220014,
+ 0x3c020800, 0x8c42171c, 0x3c03c000, 0x3c010800, 0xa02016f8, 0x00431025,
+ 0xaf625c5c, 0x8f625c50, 0x30420002, 0x10400009, 0x00000000, 0x2484f762,
+ 0x8c820000, 0x00431025, 0xaf625c5c, 0x8f625c50, 0x30420002, 0x1440fffa,
+ 0x00000000, 0x3c020800, 0x244216e4, 0x8c430000, 0x24630001, 0xac430000,
+ 0x8f630c14, 0x3063000f, 0x2c620002, 0x1440000b, 0x00009821, 0x8f630c14,
+ 0x3c020800, 0x8c4216b4, 0x3063000f, 0x24420001, 0x3c010800, 0xac2216b4,
+ 0x2c620002, 0x1040fff7, 0x00009821, 0x3c024000, 0x02421825, 0xaf635c9c,
+ 0x8f625c90, 0x30420002, 0x1440fffc, 0x00000000, 0x12600003, 0x00000000,
+ 0x0e000450, 0x00000000, 0x8fbf0020, 0x8fb3001c, 0x8fb20018, 0x8fb10014,
+ 0x8fb00010, 0x03e00008, 0x27bd0028, 0x0a0002df, 0x00000000, 0x8f634450,
+ 0x3c040800, 0x248416e8, 0x8c820000, 0x00031c02, 0x0043102b, 0x14400007,
+ 0x3c038000, 0x8c840004, 0x8f624450, 0x00021c02, 0x0083102b, 0x1040fffc,
+ 0x3c038000, 0xaf634444, 0x8f624444, 0x00431024, 0x1440fffd, 0x00000000,
+ 0x8f624448, 0x03e00008, 0x3042ffff, 0x3c024000, 0x00822025, 0xaf645c38,
+ 0x8f625c30, 0x30420002, 0x1440fffc, 0x00000000, 0x03e00008, 0x00000000,
+ 0x27bdffe0, 0x00805021, 0x14c00017, 0x254c0008, 0x3c020800, 0x8c421fd4,
+ 0x1040000a, 0x2402003e, 0x3c010800, 0xa4221fb0, 0x24020016, 0x3c010800,
+ 0xa4221fb2, 0x2402002a, 0x3c010800, 0x0a00031a, 0xa4221fb4, 0x95420014,
+ 0x3c010800, 0xa4221fb0, 0x8d430010, 0x00031402, 0x3c010800, 0xa4221fb2,
+ 0x3c010800, 0xa4231fb4, 0x3c040800, 0x94841fb4, 0x3c030800, 0x94631fb2,
+ 0x958d0006, 0x3c020800, 0x94421fb0, 0x00832023, 0x01a27023, 0x3065ffff,
+ 0x24a20028, 0x01824021, 0x3082ffff, 0x14c0001a, 0x01025821, 0x9562000c,
+ 0x3042003f, 0x3c010800, 0xa4221fb6, 0x95620004, 0x95630006, 0x3c010800,
+ 0xac201fc4, 0x3c010800, 0xac201fc8, 0x00021400, 0x00431025, 0x3c010800,
+ 0xac221720, 0x95020004, 0x3c010800, 0xa4221724, 0x95030002, 0x01a51023,
+ 0x0043102a, 0x10400010, 0x24020001, 0x3c010800, 0x0a00034e, 0xac221fd8,
+ 0x3c030800, 0x8c631fc8, 0x3c020800, 0x94421724, 0x00431021, 0xa5020004,
+ 0x3c020800, 0x94421720, 0xa5620004, 0x3c020800, 0x8c421720, 0xa5620006,
+ 0x3c020800, 0x8c421fd0, 0x3c070800, 0x8ce71fc4, 0x3c050800, 0x144000c7,
+ 0x8ca51fc8, 0x3c020800, 0x94421724, 0x00451821, 0x3063ffff, 0x0062182b,
+ 0x24020002, 0x10c2000d, 0x00a32823, 0x3c020800, 0x94421fb6, 0x30420009,
+ 0x10400008, 0x00000000, 0x9562000c, 0x3042fff6, 0xa562000c, 0x3c020800,
+ 0x94421fb6, 0x30420009, 0x00e23823, 0x3c020800, 0x8c421fd8, 0x1040004b,
+ 0x24020002, 0x01003021, 0x3c020800, 0x94421fb2, 0x00003821, 0xa500000a,
+ 0x01a21023, 0xa5020002, 0x3082ffff, 0x00021042, 0x18400008, 0x00002821,
+ 0x00401821, 0x94c20000, 0x24e70001, 0x00a22821, 0x00e3102a, 0x1440fffb,
+ 0x24c60002, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402, 0x00a22821,
+ 0x00a04821, 0x00051027, 0xa502000a, 0x00002821, 0x2506000c, 0x00003821,
+ 0x94c20000, 0x24e70001, 0x00a22821, 0x2ce20004, 0x1440fffb, 0x24c60002,
+ 0x95020002, 0x00003821, 0x91030009, 0x00442023, 0x01603021, 0x3082ffff,
+ 0xa4c00010, 0x00621821, 0x00021042, 0x18400010, 0x00a32821, 0x00404021,
+ 0x94c20000, 0x24c60002, 0x00a22821, 0x30c2007f, 0x14400006, 0x24e70001,
+ 0x8d430000, 0x3c02007f, 0x3442ff80, 0x00625024, 0x25460008, 0x00e8102a,
+ 0x1440fff3, 0x00000000, 0x30820001, 0x10400005, 0x00051c02, 0xa0c00001,
+ 0x94c20000, 0x00a22821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402,
+ 0x00a22821, 0x0a000415, 0x30a5ffff, 0x14c20063, 0x00000000, 0x3c090800,
+ 0x95291fb2, 0x95030002, 0x01a91023, 0x1062005d, 0x01003021, 0x00003821,
+ 0x00002821, 0x01a91023, 0xa5020002, 0x3082ffff, 0x00021042, 0x18400008,
+ 0xa500000a, 0x00401821, 0x94c20000, 0x24e70001, 0x00a22821, 0x00e3102a,
+ 0x1440fffb, 0x24c60002, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402,
+ 0x00a22821, 0x00a04821, 0x00051027, 0xa502000a, 0x00002821, 0x2506000c,
+ 0x00003821, 0x94c20000, 0x24e70001, 0x00a22821, 0x2ce20004, 0x1440fffb,
+ 0x24c60002, 0x95020002, 0x00003821, 0x91030009, 0x00442023, 0x01603021,
+ 0x3082ffff, 0xa4c00010, 0x3c040800, 0x94841fb4, 0x00621821, 0x00a32821,
+ 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051c02, 0x3c020800, 0x94421fb0,
+ 0x00a34021, 0x00441023, 0x00021fc2, 0x00431021, 0x00021043, 0x18400010,
+ 0x00002821, 0x00402021, 0x94c20000, 0x24c60002, 0x00a22821, 0x30c2007f,
+ 0x14400006, 0x24e70001, 0x8d430000, 0x3c02007f, 0x3442ff80, 0x00625024,
+ 0x25460008, 0x00e4102a, 0x1440fff3, 0x00000000, 0x3c020800, 0x94421fcc,
+ 0x00a22821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402, 0x00a22821,
+ 0x3102ffff, 0x00a22821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402,
+ 0x00a22821, 0x00a02021, 0x00051027, 0xa5620010, 0xad800014, 0x0a000435,
+ 0xad800000, 0x8d830010, 0x00602021, 0x10a00007, 0x00034c02, 0x01252821,
+ 0x00051402, 0x30a3ffff, 0x00432821, 0x00051402, 0x00a24821, 0x00091027,
+ 0xa502000a, 0x3c030800, 0x94631fb4, 0x3082ffff, 0x01a21021, 0x00432823,
+ 0x00a72821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402, 0x00a22821,
+ 0x00a02021, 0x00051027, 0xa5620010, 0x3082ffff, 0x00091c00, 0x00431025,
+ 0xad820010, 0x3c020800, 0x8c421fd4, 0x10400002, 0x25a2fff2, 0xa5820034,
+ 0x3c020800, 0x8c421fc8, 0x3c030800, 0x8c631720, 0x24420001, 0x3c010800,
+ 0xac221fc8, 0x3c020800, 0x8c421fc4, 0x31c4ffff, 0x00641821, 0x3c010800,
+ 0xac231720, 0x00441021, 0x3c010800, 0xac221fc4, 0x03e00008, 0x27bd0020,
+ 0x27bdffc8, 0x3c040800, 0x248416f8, 0xafbf0034, 0xafbe0030, 0xafb7002c,
+ 0xafb60028, 0xafb50024, 0xafb40020, 0xafb3001c, 0xafb20018, 0xafb10014,
+ 0xafb00010, 0x90830000, 0x24020003, 0x146200f4, 0x00000000, 0x3c020800,
+ 0x8c421710, 0x3c030800, 0x8c63170c, 0x3c1e0800, 0x97de16f6, 0x0043102a,
+ 0x104000eb, 0x3c168000, 0x249708c4, 0x33d5ffff, 0x24920018, 0x3c020800,
+ 0x8c421718, 0x104000e4, 0x00000000, 0x3c140800, 0x96941fb0, 0x3282ffff,
+ 0x104000d6, 0x00008021, 0x00409821, 0x00008821, 0x8f634450, 0x3c020800,
+ 0x8c4216e8, 0x00031c02, 0x0043102b, 0x14400008, 0x00000000, 0x3c040800,
+ 0x8c8416ec, 0x8f624450, 0x00021c02, 0x0083102b, 0x1040fffc, 0x00000000,
+ 0xaf764444, 0x8f624444, 0x00561024, 0x10400006, 0x00000000, 0x3c038000,
+ 0x8f624444, 0x00431024, 0x1440fffd, 0x00000000, 0x8f624448, 0x3046ffff,
+ 0x10c0005f, 0x00000000, 0x3c090800, 0x01314821, 0x8d291728, 0x9528000a,
+ 0x31020040, 0x10400004, 0x2407180c, 0x8d22000c, 0x2407188c, 0xacc20018,
+ 0x31021000, 0x10400004, 0x34e32000, 0x00081040, 0x3042c000, 0x00623825,
+ 0x31020080, 0x54400001, 0x34e70010, 0x3c020800, 0x00511021, 0x8c421730,
+ 0x3c030800, 0x00711821, 0x8c631734, 0x00021500, 0x00031c00, 0x00431025,
+ 0xacc20014, 0x95240008, 0x3202ffff, 0x00821021, 0x0262102a, 0x14400002,
+ 0x02902823, 0x00802821, 0x8d220000, 0x02058021, 0xacc20000, 0x8d220004,
+ 0x00c02021, 0x26310010, 0xac820004, 0x30e2ffff, 0xac800008, 0xa485000e,
+ 0xac820010, 0x24020305, 0x0e000560, 0xa482000c, 0x3202ffff, 0x0053102b,
+ 0x1440ffaf, 0x3202ffff, 0x0a00054c, 0x00000000, 0x8e420000, 0x8e43fffc,
+ 0x0043102a, 0x10400084, 0x00000000, 0x8e45fff0, 0x8f644450, 0x3c030800,
+ 0x8c6316e8, 0x00051100, 0x3c090800, 0x01224821, 0x8d291728, 0x00041402,
+ 0x0062182b, 0x14600008, 0x00000000, 0x3c030800, 0x8c6316ec, 0x8f624450,
+ 0x00021402, 0x0062102b, 0x1040fffc, 0x00000000, 0xaf764444, 0x8f624444,
+ 0x00561024, 0x10400006, 0x00000000, 0x3c038000, 0x8f624444, 0x00431024,
+ 0x1440fffd, 0x00000000, 0x8f624448, 0x3046ffff, 0x14c00005, 0x00000000,
+ 0x8ee20000, 0x24420001, 0x0a000554, 0xaee20000, 0x9528000a, 0x31020040,
+ 0x10400004, 0x2407180c, 0x8d22000c, 0x2407188c, 0xacc20018, 0x31021000,
+ 0x10400004, 0x34e32000, 0x00081040, 0x3042c000, 0x00623825, 0x00051900,
+ 0x3c020800, 0x00431021, 0x8c421730, 0x3c010800, 0x00230821, 0x8c231734,
+ 0x00021500, 0x00031c00, 0x00431025, 0xacc20014, 0x3c030800, 0x8c631704,
+ 0x95220008, 0x00432023, 0x3202ffff, 0x3083ffff, 0x00431021, 0x02a2102a,
+ 0x14400002, 0x03d02823, 0x00802821, 0x8e420000, 0x30a4ffff, 0x00441021,
+ 0xae420000, 0xa4c5000e, 0x8d220000, 0xacc20000, 0x8d220004, 0x8e43fff4,
+ 0x00431021, 0xacc20004, 0x8e43fff4, 0x95220008, 0x00641821, 0x0062102a,
+ 0x14400006, 0x02058021, 0x8e42fff0, 0xae40fff4, 0x24420001, 0x0a000530,
+ 0xae42fff0, 0xae43fff4, 0xacc00008, 0x3202ffff, 0x10550003, 0x31020004,
+ 0x10400006, 0x24020305, 0x31020080, 0x54400001, 0x34e70010, 0x34e70020,
+ 0x24020905, 0xa4c2000c, 0x30e2ffff, 0xacc20010, 0x3c030800, 0x8c63170c,
+ 0x3c020800, 0x8c421710, 0x54620004, 0x3c02b49a, 0x3c024b65, 0x0a000548,
+ 0x34427654, 0x344289ab, 0xacc2001c, 0x0e000560, 0x00c02021, 0x3202ffff,
+ 0x0055102b, 0x1440ff7e, 0x00000000, 0x8e420000, 0x8e43fffc, 0x0043102a,
+ 0x1440ff1a, 0x00000000, 0x8fbf0034, 0x8fbe0030, 0x8fb7002c, 0x8fb60028,
+ 0x8fb50024, 0x8fb40020, 0x8fb3001c, 0x8fb20018, 0x8fb10014, 0x8fb00010,
+ 0x03e00008, 0x27bd0038, 0x27bdffe8, 0xafbf0014, 0xafb00010, 0x8f624450,
+ 0x8f634410, 0x0a00056f, 0x00808021, 0x8f626820, 0x30422000, 0x10400003,
+ 0x00000000, 0x0e00025a, 0x00002021, 0x8f624450, 0x8f634410, 0x3042ffff,
+ 0x0043102b, 0x1440fff5, 0x00000000, 0x8f630c14, 0x3063000f, 0x2c620002,
+ 0x1440000b, 0x00000000, 0x8f630c14, 0x3c020800, 0x8c4216b4, 0x3063000f,
+ 0x24420001, 0x3c010800, 0xac2216b4, 0x2c620002, 0x1040fff7, 0x00000000,
+ 0xaf705c18, 0x8f625c10, 0x30420002, 0x10400009, 0x00000000, 0x8f626820,
+ 0x30422000, 0x1040fff8, 0x00000000, 0x0e00025a, 0x00002021, 0x0a000582,
+ 0x00000000, 0x8fbf0014, 0x8fb00010, 0x03e00008, 0x27bd0018, 0x00000000,
+ 0x00000000
+};
+
+u32 tg3TsoFwRodata[] = {
+ 0x4d61696e, 0x43707542, 0x00000000, 0x00000000, 0x74637073, 0x6567496e,
+ 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000
+};
+
+#if 0 /* All zeros, dont eat up space with it. */
+u32 tg3TsoFwData[] = {
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000
+};
+#endif
+
+/* tp->lock is held. */
+static int tg3_load_tso_firmware(struct tg3 *tp)
+{
+ struct fw_info info;
+ int err, i;
+
+ info.text_base = TG3_TSO_FW_TEXT_ADDR;
+ info.text_len = TG3_TSO_FW_TEXT_LEN;
+ info.text_data = &tg3TsoFwText[0];
+ info.rodata_base = TG3_TSO_FW_RODATA_ADDR;
+ info.rodata_len = TG3_TSO_FW_RODATA_LEN;
+ info.rodata_data = &tg3TsoFwRodata[0];
+ info.data_base = TG3_TSO_FW_DATA_ADDR;
+ info.data_len = TG3_TSO_FW_DATA_LEN;
+ info.data_data = NULL;
+
+ err = tg3_load_firmware_cpu(tp, TX_CPU_BASE,
+ TX_CPU_SCRATCH_BASE, TX_CPU_SCRATCH_SIZE,
+ &info);
+ if (err)
+ return err;
+
+ /* Now startup only the TX cpu. */
+ tw32(TX_CPU_BASE + CPU_STATE, 0xffffffff);
+ tw32(TX_CPU_BASE + CPU_PC, TG3_TSO_FW_TEXT_ADDR);
+
+ /* Flush posted writes. */
+ tr32(TX_CPU_BASE + CPU_PC);
+ for (i = 0; i < 5; i++) {
+ if (tr32(TX_CPU_BASE + CPU_PC) == TG3_TSO_FW_TEXT_ADDR)
+ break;
+ tw32(TX_CPU_BASE + CPU_STATE, 0xffffffff);
+ tw32(TX_CPU_BASE + CPU_MODE, CPU_MODE_HALT);
+ tw32(TX_CPU_BASE + CPU_PC, TG3_TSO_FW_TEXT_ADDR);
+
+ /* Flush posted writes. */
+ tr32(TX_CPU_BASE + CPU_PC);
+
+ udelay(1000);
+ }
+ if (i >= 5) {
+ printk(KERN_ERR PFX "tg3_load_tso_firmware fails for %s "
+ "to set TX CPU PC, is %08x should be %08x\n",
+ tp->dev->name, tr32(TX_CPU_BASE + CPU_PC),
+ TG3_TSO_FW_TEXT_ADDR);
+ return -ENODEV;
+ }
+ tw32(TX_CPU_BASE + CPU_STATE, 0xffffffff);
+ tw32(TX_CPU_BASE + CPU_MODE, 0x00000000);
+
+ /* Flush posted writes. */
+ tr32(TX_CPU_BASE + CPU_MODE);
+
+ return 0;
+}
+
+#endif /* TG3_DO_TSO != 0 */
+
+/* tp->lock is held. */
+static void __tg3_set_mac_addr(struct tg3 *tp)
+{
+ u32 addr_high, addr_low;
+ int i;
+
+ addr_high = ((tp->dev->dev_addr[0] << 8) |
+ tp->dev->dev_addr[1]);
+ addr_low = ((tp->dev->dev_addr[2] << 24) |
+ (tp->dev->dev_addr[3] << 16) |
+ (tp->dev->dev_addr[4] << 8) |
+ (tp->dev->dev_addr[5] << 0));
+ for (i = 0; i < 4; i++) {
+ tw32(MAC_ADDR_0_HIGH + (i * 8), addr_high);
+ tw32(MAC_ADDR_0_LOW + (i * 8), addr_low);
+ }
+
+ addr_high = (tp->dev->dev_addr[0] +
+ tp->dev->dev_addr[1] +
+ tp->dev->dev_addr[2] +
+ tp->dev->dev_addr[3] +
+ tp->dev->dev_addr[4] +
+ tp->dev->dev_addr[5]) &
+ TX_BACKOFF_SEED_MASK;
+ tw32(MAC_TX_BACKOFF_SEED, addr_high);
+}
+
+static int tg3_set_mac_addr(struct net_device *dev, void *p)
+{
+ struct tg3 *tp = dev->priv;
+ struct sockaddr *addr = p;
+
+ if (netif_running(dev))
+ return -EBUSY;
+
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+ spin_lock_irq(&tp->lock);
+ __tg3_set_mac_addr(tp);
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+}
+
+/* tp->lock is held. */
+static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr,
+ dma_addr_t mapping, u32 maxlen_flags,
+ u32 nic_addr)
+{
+ tg3_write_mem(tp,
+ (bdinfo_addr +
+ TG3_BDINFO_HOST_ADDR +
+ TG3_64BIT_REG_HIGH),
+ ((u64) mapping >> 32));
+ tg3_write_mem(tp,
+ (bdinfo_addr +
+ TG3_BDINFO_HOST_ADDR +
+ TG3_64BIT_REG_LOW),
+ ((u64) mapping & 0xffffffff));
+ tg3_write_mem(tp,
+ (bdinfo_addr +
+ TG3_BDINFO_MAXLEN_FLAGS),
+ maxlen_flags);
+ tg3_write_mem(tp,
+ (bdinfo_addr +
+ TG3_BDINFO_NIC_ADDR),
+ nic_addr);
+}
+
+static void __tg3_set_rx_mode(struct net_device *);
+
+/* tp->lock is held. */
+static int tg3_reset_hw(struct tg3 *tp)
+{
+ u32 val;
+ int i, err;
+
+ tg3_disable_ints(tp);
+
+ tg3_stop_fw(tp);
+
+ if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) {
+ err = tg3_abort_hw(tp);
+ if (err)
+ return err;
+ }
+
+ tg3_chip_reset(tp);
+
+ tw32(GRC_MODE, tp->grc_mode);
+ tg3_write_mem(tp,
+ NIC_SRAM_FIRMWARE_MBOX,
+ NIC_SRAM_FIRMWARE_MBOX_MAGIC1);
+ if (tp->phy_id == PHY_ID_SERDES) {
+ tp->mac_mode = MAC_MODE_PORT_MODE_TBI;
+ tw32(MAC_MODE, tp->mac_mode);
+ } else
+ tw32(MAC_MODE, 0);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ /* Wait for firmware initialization to complete. */
+ for (i = 0; i < 100000; i++) {
+ tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val);
+ if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1)
+ break;
+ udelay(10);
+ }
+ if (i >= 100000) {
+ printk(KERN_ERR PFX "tg3_reset_hw timed out for %s, "
+ "firmware will not restart magic=%08x\n",
+ tp->dev->name, val);
+ return -ENODEV;
+ }
+
+ if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF)
+ tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+ DRV_STATE_START);
+ else
+ tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+ DRV_STATE_SUSPEND);
+
+ /* This works around an issue with Athlon chipsets on
+ * B3 tigon3 silicon. This bit has no effect on any
+ * other revision.
+ */
+ val = tr32(TG3PCI_CLOCK_CTRL);
+ val |= CLOCK_CTRL_DELAY_PCI_GRANT;
+ tw32(TG3PCI_CLOCK_CTRL, val);
+ tr32(TG3PCI_CLOCK_CTRL);
+
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0 &&
+ (tp->tg3_flags & TG3_FLAG_PCIX_MODE)) {
+ val = tr32(TG3PCI_PCISTATE);
+ val |= PCISTATE_RETRY_SAME_DMA;
+ tw32(TG3PCI_PCISTATE, val);
+ }
+
+ /* Clear statistics/status block in chip, and status block in ram. */
+ for (i = NIC_SRAM_STATS_BLK;
+ i < NIC_SRAM_STATUS_BLK + TG3_HW_STATUS_SIZE;
+ i += sizeof(u32)) {
+ tg3_write_mem(tp, i, 0);
+ udelay(40);
+ }
+ memset(tp->hw_status, 0, TG3_HW_STATUS_SIZE);
+
+ /* This value is determined during the probe time DMA
+ * engine test, tg3_test_dma.
+ */
+ tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+
+ tp->grc_mode &= ~(GRC_MODE_HOST_SENDBDS |
+ GRC_MODE_4X_NIC_SEND_RINGS |
+ GRC_MODE_NO_TX_PHDR_CSUM |
+ GRC_MODE_NO_RX_PHDR_CSUM);
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS)
+ tp->grc_mode |= GRC_MODE_HOST_SENDBDS;
+ else
+ tp->grc_mode |= GRC_MODE_4X_NIC_SEND_RINGS;
+ if (tp->tg3_flags & TG3_FLAG_NO_TX_PSEUDO_CSUM)
+ tp->grc_mode |= GRC_MODE_NO_TX_PHDR_CSUM;
+ if (tp->tg3_flags & TG3_FLAG_NO_RX_PSEUDO_CSUM)
+ tp->grc_mode |= GRC_MODE_NO_RX_PHDR_CSUM;
+
+ tw32(GRC_MODE,
+ tp->grc_mode |
+ (GRC_MODE_IRQ_ON_MAC_ATTN | GRC_MODE_HOST_STACKUP));
+
+ /* Setup the timer prescalar register. Clock is always 66Mhz. */
+ tw32(GRC_MISC_CFG,
+ (65 << GRC_MISC_CFG_PRESCALAR_SHIFT));
+
+ /* Initialize MBUF/DESC pool. */
+ tw32(BUFMGR_MB_POOL_ADDR, NIC_SRAM_MBUF_POOL_BASE);
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704)
+ tw32(BUFMGR_MB_POOL_SIZE, NIC_SRAM_MBUF_POOL_SIZE64);
+ else
+ tw32(BUFMGR_MB_POOL_SIZE, NIC_SRAM_MBUF_POOL_SIZE96);
+ tw32(BUFMGR_DMA_DESC_POOL_ADDR, NIC_SRAM_DMA_DESC_POOL_BASE);
+ tw32(BUFMGR_DMA_DESC_POOL_SIZE, NIC_SRAM_DMA_DESC_POOL_SIZE);
+
+ if (!(tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE)) {
+ tw32(BUFMGR_MB_RDMA_LOW_WATER,
+ tp->bufmgr_config.mbuf_read_dma_low_water);
+ tw32(BUFMGR_MB_MACRX_LOW_WATER,
+ tp->bufmgr_config.mbuf_mac_rx_low_water);
+ tw32(BUFMGR_MB_HIGH_WATER,
+ tp->bufmgr_config.mbuf_high_water);
+ } else {
+ tw32(BUFMGR_MB_RDMA_LOW_WATER,
+ tp->bufmgr_config.mbuf_read_dma_low_water_jumbo);
+ tw32(BUFMGR_MB_MACRX_LOW_WATER,
+ tp->bufmgr_config.mbuf_mac_rx_low_water_jumbo);
+ tw32(BUFMGR_MB_HIGH_WATER,
+ tp->bufmgr_config.mbuf_high_water_jumbo);
+ }
+ tw32(BUFMGR_DMA_LOW_WATER,
+ tp->bufmgr_config.dma_low_water);
+ tw32(BUFMGR_DMA_HIGH_WATER,
+ tp->bufmgr_config.dma_high_water);
+
+ tw32(BUFMGR_MODE, BUFMGR_MODE_ENABLE | BUFMGR_MODE_ATTN_ENABLE);
+ for (i = 0; i < 2000; i++) {
+ if (tr32(BUFMGR_MODE) & BUFMGR_MODE_ENABLE)
+ break;
+ udelay(10);
+ }
+ if (i >= 2000) {
+ printk(KERN_ERR PFX "tg3_reset_hw cannot enable BUFMGR for %s.\n",
+ tp->dev->name);
+ return -ENODEV;
+ }
+
+ tw32(FTQ_RESET, 0xffffffff);
+ tw32(FTQ_RESET, 0x00000000);
+ for (i = 0; i < 2000; i++) {
+ if (tr32(FTQ_RESET) == 0x00000000)
+ break;
+ udelay(10);
+ }
+ if (i >= 2000) {
+ printk(KERN_ERR PFX "tg3_reset_hw cannot reset FTQ for %s.\n",
+ tp->dev->name);
+ return -ENODEV;
+ }
+
+ /* Initialize TG3_BDINFO's at:
+ * RCVDBDI_STD_BD: standard eth size rx ring
+ * RCVDBDI_JUMBO_BD: jumbo frame rx ring
+ * RCVDBDI_MINI_BD: small frame rx ring (??? does not work)
+ *
+ * like so:
+ * TG3_BDINFO_HOST_ADDR: high/low parts of DMA address of ring
+ * TG3_BDINFO_MAXLEN_FLAGS: (rx max buffer size << 16) |
+ * ring attribute flags
+ * TG3_BDINFO_NIC_ADDR: location of descriptors in nic SRAM
+ *
+ * Standard receive ring @ NIC_SRAM_RX_BUFFER_DESC, 512 entries.
+ * Jumbo receive ring @ NIC_SRAM_RX_JUMBO_BUFFER_DESC, 256 entries.
+ *
+ * The size of each ring is fixed in the firmware, but the location is
+ * configurable.
+ */
+ tw32(RCVDBDI_STD_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_HIGH,
+ ((u64) tp->rx_std_mapping >> 32));
+ tw32(RCVDBDI_STD_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_LOW,
+ ((u64) tp->rx_std_mapping & 0xffffffff));
+ tw32(RCVDBDI_STD_BD + TG3_BDINFO_MAXLEN_FLAGS,
+ RX_STD_MAX_SIZE << BDINFO_FLAGS_MAXLEN_SHIFT);
+ tw32(RCVDBDI_STD_BD + TG3_BDINFO_NIC_ADDR,
+ NIC_SRAM_RX_BUFFER_DESC);
+
+ tw32(RCVDBDI_MINI_BD + TG3_BDINFO_MAXLEN_FLAGS,
+ BDINFO_FLAGS_DISABLED);
+
+ if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE) {
+ tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_HIGH,
+ ((u64) tp->rx_jumbo_mapping >> 32));
+ tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_LOW,
+ ((u64) tp->rx_jumbo_mapping & 0xffffffff));
+ tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_MAXLEN_FLAGS,
+ RX_JUMBO_MAX_SIZE << BDINFO_FLAGS_MAXLEN_SHIFT);
+ tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_NIC_ADDR,
+ NIC_SRAM_RX_JUMBO_BUFFER_DESC);
+ } else {
+ tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_MAXLEN_FLAGS,
+ BDINFO_FLAGS_DISABLED);
+ }
+
+ /* Setup replenish thresholds. */
+ tw32(RCVBDI_STD_THRESH, tp->rx_pending / 8);
+ tw32(RCVBDI_JUMBO_THRESH, tp->rx_jumbo_pending / 8);
+
+ /* Clear out send RCB ring in SRAM. */
+ for (i = NIC_SRAM_SEND_RCB; i < NIC_SRAM_RCV_RET_RCB; i += TG3_BDINFO_SIZE)
+ tg3_write_mem(tp, i + TG3_BDINFO_MAXLEN_FLAGS, BDINFO_FLAGS_DISABLED);
+
+ tp->tx_prod = 0;
+ tp->tx_cons = 0;
+ tw32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW, 0);
+ tw32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + TG3_64BIT_REG_LOW, 0);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_SNDNIC_PROD_IDX_0 + TG3_64BIT_REG_LOW);
+
+ if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+ tg3_set_bdinfo(tp, NIC_SRAM_SEND_RCB,
+ tp->tx_desc_mapping,
+ (TG3_TX_RING_SIZE <<
+ BDINFO_FLAGS_MAXLEN_SHIFT),
+ NIC_SRAM_TX_BUFFER_DESC);
+ } else {
+ tg3_set_bdinfo(tp, NIC_SRAM_SEND_RCB,
+ 0,
+ BDINFO_FLAGS_DISABLED,
+ NIC_SRAM_TX_BUFFER_DESC);
+ }
+
+ for (i = NIC_SRAM_RCV_RET_RCB; i < NIC_SRAM_STATS_BLK; i += TG3_BDINFO_SIZE) {
+ tg3_write_mem(tp, i + TG3_BDINFO_MAXLEN_FLAGS,
+ BDINFO_FLAGS_DISABLED);
+ }
+
+ tp->rx_rcb_ptr = 0;
+ tw32_mailbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW, 0);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW);
+
+ tg3_set_bdinfo(tp, NIC_SRAM_RCV_RET_RCB,
+ tp->rx_rcb_mapping,
+ (TG3_RX_RCB_RING_SIZE <<
+ BDINFO_FLAGS_MAXLEN_SHIFT),
+ 0);
+
+ tp->rx_std_ptr = tp->rx_pending;
+ tw32_mailbox(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW,
+ tp->rx_std_ptr);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW);
+
+ if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE)
+ tp->rx_jumbo_ptr = tp->rx_jumbo_pending;
+ else
+ tp->rx_jumbo_ptr = 0;
+ tw32_mailbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW,
+ tp->rx_jumbo_ptr);
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW);
+
+ /* Initialize MAC address and backoff seed. */
+ __tg3_set_mac_addr(tp);
+
+ /* MTU + ethernet header + FCS + optional VLAN tag */
+ tw32(MAC_RX_MTU_SIZE, tp->dev->mtu + ETH_HLEN + 8);
+
+ /* The slot time is changed by tg3_setup_phy if we
+ * run at gigabit with half duplex.
+ */
+ tw32(MAC_TX_LENGTHS,
+ (2 << TX_LENGTHS_IPG_CRS_SHIFT) |
+ (6 << TX_LENGTHS_IPG_SHIFT) |
+ (32 << TX_LENGTHS_SLOT_TIME_SHIFT));
+
+ /* Receive rules. */
+ tw32(MAC_RCV_RULE_CFG, RCV_RULE_CFG_DEFAULT_CLASS);
+ tw32(RCVLPC_CONFIG, 0x0181);
+
+ /* Receive/send statistics. */
+ tw32(RCVLPC_STATS_ENABLE, 0xffffff);
+ tw32(RCVLPC_STATSCTRL, RCVLPC_STATSCTRL_ENABLE);
+ tw32(SNDDATAI_STATSENAB, 0xffffff);
+ tw32(SNDDATAI_STATSCTRL,
+ (SNDDATAI_SCTRL_ENABLE |
+ SNDDATAI_SCTRL_FASTUPD));
+
+ /* Setup host coalescing engine. */
+ tw32(HOSTCC_MODE, 0);
+ for (i = 0; i < 2000; i++) {
+ if (!(tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE))
+ break;
+ udelay(10);
+ }
+
+ // akw: I have set these all back to default coalescing values.
+
+ tw32(HOSTCC_RXCOL_TICKS, DEFAULT_RXCOL_TICKS); //0);
+ tw32(HOSTCC_RXMAX_FRAMES, DEFAULT_RXMAX_FRAMES); //1);
+ tw32(HOSTCC_RXCOAL_TICK_INT, DEFAULT_RXCOAL_TICK_INT); //, 0);
+ tw32(HOSTCC_RXCOAL_MAXF_INT, DEFAULT_RXCOAL_MAXF_INT); //, 1);
+ tw32(HOSTCC_TXCOL_TICKS, DEFAULT_TXCOL_TICKS); //, LOW_TXCOL_TICKS);
+ tw32(HOSTCC_TXMAX_FRAMES, DEFAULT_TXMAX_FRAMES); //, LOW_RXMAX_FRAMES);
+ tw32(HOSTCC_TXCOAL_TICK_INT, DEFAULT_TXCOAL_TICK_INT); //, 0);
+ tw32(HOSTCC_TXCOAL_MAXF_INT, DEFAULT_TXCOAL_MAXF_INT); //, 0);
+ tw32(HOSTCC_STAT_COAL_TICKS,
+ DEFAULT_STAT_COAL_TICKS);
+
+ /* Status/statistics block address. */
+ tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
+ ((u64) tp->stats_mapping >> 32));
+ tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW,
+ ((u64) tp->stats_mapping & 0xffffffff));
+ tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
+ ((u64) tp->status_mapping >> 32));
+ tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW,
+ ((u64) tp->status_mapping & 0xffffffff));
+ tw32(HOSTCC_STATS_BLK_NIC_ADDR, NIC_SRAM_STATS_BLK);
+ tw32(HOSTCC_STATUS_BLK_NIC_ADDR, NIC_SRAM_STATUS_BLK);
+
+ tw32(HOSTCC_MODE, HOSTCC_MODE_ENABLE | tp->coalesce_mode);
+
+ tw32(RCVCC_MODE, RCVCC_MODE_ENABLE | RCVCC_MODE_ATTN_ENABLE);
+ tw32(RCVLPC_MODE, RCVLPC_MODE_ENABLE);
+ tw32(RCVLSC_MODE, RCVLSC_MODE_ENABLE | RCVLSC_MODE_ATTN_ENABLE);
+
+ tp->mac_mode = MAC_MODE_TXSTAT_ENABLE | MAC_MODE_RXSTAT_ENABLE |
+ MAC_MODE_TDE_ENABLE | MAC_MODE_RDE_ENABLE | MAC_MODE_FHDE_ENABLE;
+ tw32(MAC_MODE, tp->mac_mode | MAC_MODE_RXSTAT_CLEAR | MAC_MODE_TXSTAT_CLEAR);
+ tr32(MAC_MODE);
+ udelay(40);
+
+ tp->grc_local_ctrl = GRC_LCLCTRL_INT_ON_ATTN | GRC_LCLCTRL_AUTO_SEEPROM;
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+ tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 |
+ GRC_LCLCTRL_GPIO_OUTPUT1);
+ tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
+ tr32(GRC_LOCAL_CTRL);
+ udelay(100);
+
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0);
+ tr32(MAILBOX_INTERRUPT_0);
+
+ tw32(DMAC_MODE, DMAC_MODE_ENABLE);
+ tr32(DMAC_MODE);
+ udelay(40);
+
+ tw32(WDMAC_MODE, (WDMAC_MODE_ENABLE | WDMAC_MODE_TGTABORT_ENAB |
+ WDMAC_MODE_MSTABORT_ENAB | WDMAC_MODE_PARITYERR_ENAB |
+ WDMAC_MODE_ADDROFLOW_ENAB | WDMAC_MODE_FIFOOFLOW_ENAB |
+ WDMAC_MODE_FIFOURUN_ENAB | WDMAC_MODE_FIFOOREAD_ENAB |
+ WDMAC_MODE_LNGREAD_ENAB));
+ tr32(WDMAC_MODE);
+ udelay(40);
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 &&
+ (tp->tg3_flags & TG3_FLAG_PCIX_MODE)) {
+ val = tr32(TG3PCI_X_CAPS);
+ val &= ~(PCIX_CAPS_SPLIT_MASK | PCIX_CAPS_BURST_MASK);
+ val |= (PCIX_CAPS_MAX_BURST_5704 << PCIX_CAPS_BURST_SHIFT);
+ if (tp->tg3_flags & TG3_FLAG_SPLIT_MODE)
+ val |= (tp->split_mode_max_reqs <<
+ PCIX_CAPS_SPLIT_SHIFT);
+ tw32(TG3PCI_X_CAPS, val);
+ }
+
+ val = (RDMAC_MODE_ENABLE | RDMAC_MODE_TGTABORT_ENAB |
+ RDMAC_MODE_MSTABORT_ENAB | RDMAC_MODE_PARITYERR_ENAB |
+ RDMAC_MODE_ADDROFLOW_ENAB | RDMAC_MODE_FIFOOFLOW_ENAB |
+ RDMAC_MODE_FIFOURUN_ENAB | RDMAC_MODE_FIFOOREAD_ENAB |
+ RDMAC_MODE_LNGREAD_ENAB);
+ if (tp->tg3_flags & TG3_FLAG_SPLIT_MODE)
+ val |= RDMAC_MODE_SPLIT_ENABLE;
+ tw32(RDMAC_MODE, val);
+ tr32(RDMAC_MODE);
+ udelay(40);
+
+ tw32(RCVDCC_MODE, RCVDCC_MODE_ENABLE | RCVDCC_MODE_ATTN_ENABLE);
+ tw32(MBFREE_MODE, MBFREE_MODE_ENABLE);
+ tw32(SNDDATAC_MODE, SNDDATAC_MODE_ENABLE);
+ tw32(SNDBDC_MODE, SNDBDC_MODE_ENABLE | SNDBDC_MODE_ATTN_ENABLE);
+ tw32(RCVBDI_MODE, RCVBDI_MODE_ENABLE | RCVBDI_MODE_RCB_ATTN_ENAB);
+ tw32(RCVDBDI_MODE, RCVDBDI_MODE_ENABLE | RCVDBDI_MODE_INV_RING_SZ);
+ tw32(SNDDATAI_MODE, SNDDATAI_MODE_ENABLE);
+ tw32(SNDBDI_MODE, SNDBDI_MODE_ENABLE | SNDBDI_MODE_ATTN_ENABLE);
+ tw32(SNDBDS_MODE, SNDBDS_MODE_ENABLE | SNDBDS_MODE_ATTN_ENABLE);
+
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0) {
+ err = tg3_load_5701_a0_firmware_fix(tp);
+ if (err)
+ return err;
+ }
+
+#if TG3_DO_TSO != 0
+ err = tg3_load_tso_firmware(tp);
+ if (err)
+ return err;
+#endif
+
+ tp->tx_mode = TX_MODE_ENABLE;
+ tw32(MAC_TX_MODE, tp->tx_mode);
+ tr32(MAC_TX_MODE);
+ udelay(100);
+
+ tp->rx_mode = RX_MODE_ENABLE;
+ tw32(MAC_RX_MODE, tp->rx_mode);
+ tr32(MAC_RX_MODE);
+ udelay(10);
+
+ if (tp->link_config.phy_is_low_power) {
+ tp->link_config.phy_is_low_power = 0;
+ tp->link_config.speed = tp->link_config.orig_speed;
+ tp->link_config.duplex = tp->link_config.orig_duplex;
+ tp->link_config.autoneg = tp->link_config.orig_autoneg;
+ }
+
+ tp->mi_mode = MAC_MI_MODE_BASE;
+ tw32(MAC_MI_MODE, tp->mi_mode);
+ tr32(MAC_MI_MODE);
+ udelay(40);
+
+ tw32(MAC_LED_CTRL, 0);
+ tw32(MAC_MI_STAT, MAC_MI_STAT_LNKSTAT_ATTN_ENAB);
+ tw32(MAC_RX_MODE, RX_MODE_RESET);
+ tr32(MAC_RX_MODE);
+ udelay(10);
+ tw32(MAC_RX_MODE, tp->rx_mode);
+ tr32(MAC_RX_MODE);
+ udelay(10);
+
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5703_A1)
+ tw32(MAC_SERDES_CFG, 0x616000);
+
+ err = tg3_setup_phy(tp);
+ if (err)
+ return err;
+
+ if (tp->phy_id != PHY_ID_SERDES) {
+ u32 tmp;
+
+ /* Clear CRC stats. */
+ tg3_readphy(tp, 0x1e, &tmp);
+ tg3_writephy(tp, 0x1e, tmp | 0x8000);
+ tg3_readphy(tp, 0x14, &tmp);
+ }
+
+ __tg3_set_rx_mode(tp->dev);
+
+ /* Initialize receive rules. */
+ tw32(MAC_RCV_RULE_0, 0xc2000000 & RCV_RULE_DISABLE_MASK);
+ tw32(MAC_RCV_VALUE_0, 0xffffffff & RCV_RULE_DISABLE_MASK);
+ tw32(MAC_RCV_RULE_1, 0x86000004 & RCV_RULE_DISABLE_MASK);
+ tw32(MAC_RCV_VALUE_1, 0xffffffff & RCV_RULE_DISABLE_MASK);
+#if 0
+ tw32(MAC_RCV_RULE_2, 0); tw32(MAC_RCV_VALUE_2, 0);
+ tw32(MAC_RCV_RULE_3, 0); tw32(MAC_RCV_VALUE_3, 0);
+#endif
+ tw32(MAC_RCV_RULE_4, 0); tw32(MAC_RCV_VALUE_4, 0);
+ tw32(MAC_RCV_RULE_5, 0); tw32(MAC_RCV_VALUE_5, 0);
+ tw32(MAC_RCV_RULE_6, 0); tw32(MAC_RCV_VALUE_6, 0);
+ tw32(MAC_RCV_RULE_7, 0); tw32(MAC_RCV_VALUE_7, 0);
+ tw32(MAC_RCV_RULE_8, 0); tw32(MAC_RCV_VALUE_8, 0);
+ tw32(MAC_RCV_RULE_9, 0); tw32(MAC_RCV_VALUE_9, 0);
+ tw32(MAC_RCV_RULE_10, 0); tw32(MAC_RCV_VALUE_10, 0);
+ tw32(MAC_RCV_RULE_11, 0); tw32(MAC_RCV_VALUE_11, 0);
+ tw32(MAC_RCV_RULE_12, 0); tw32(MAC_RCV_VALUE_12, 0);
+ tw32(MAC_RCV_RULE_13, 0); tw32(MAC_RCV_VALUE_13, 0);
+ tw32(MAC_RCV_RULE_14, 0); tw32(MAC_RCV_VALUE_14, 0);
+ tw32(MAC_RCV_RULE_15, 0); tw32(MAC_RCV_VALUE_15, 0);
+
+ if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE)
+ tg3_enable_ints(tp);
+
+ return 0;
+}
+
+/* Called at device open time to get the chip ready for
+ * packet processing. Invoked with tp->lock held.
+ */
+static int tg3_init_hw(struct tg3 *tp)
+{
+ int err;
+
+ /* Force the chip into D0. */
+ err = tg3_set_power_state(tp, 0);
+ if (err)
+ goto out;
+
+ tg3_switch_clocks(tp);
+
+ tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);
+
+ err = tg3_reset_hw(tp);
+
+out:
+ return err;
+}
+
+static void tg3_timer(unsigned long __opaque)
+{
+ struct tg3 *tp = (struct tg3 *) __opaque;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->lock, flags);
+ spin_lock(&tp->tx_lock);
+
+ /* All of this garbage is because when using non-tagged
+ * IRQ status the mailbox/status_block protocol the chip
+ * uses with the cpu is race prone.
+ */
+ if (tp->hw_status->status & SD_STATUS_UPDATED) {
+ tw32(GRC_LOCAL_CTRL,
+ tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
+ } else {
+ tw32(HOSTCC_MODE, tp->coalesce_mode |
+ (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
+ }
+
+ if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+ tg3_halt(tp);
+ tg3_init_rings(tp);
+ tg3_init_hw(tp);
+ }
+
+ /* This part only runs once per second. */
+ if (!--tp->timer_counter) {
+ if (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) {
+ u32 mac_stat;
+ int phy_event;
+
+ mac_stat = tr32(MAC_STATUS);
+
+ phy_event = 0;
+ if (tp->tg3_flags & TG3_FLAG_USE_MI_INTERRUPT) {
+ if (mac_stat & MAC_STATUS_MI_INTERRUPT)
+ phy_event = 1;
+ } else if (mac_stat & MAC_STATUS_LNKSTATE_CHANGED)
+ phy_event = 1;
+
+ if (phy_event)
+ tg3_setup_phy(tp);
+ } else if (tp->tg3_flags & TG3_FLAG_POLL_SERDES) {
+ u32 mac_stat = tr32(MAC_STATUS);
+ int need_setup = 0;
+
+ if (netif_carrier_ok(tp->dev) &&
+ (mac_stat & MAC_STATUS_LNKSTATE_CHANGED)) {
+ need_setup = 1;
+ }
+ if (! netif_carrier_ok(tp->dev) &&
+ (mac_stat & MAC_STATUS_PCS_SYNCED)) {
+ need_setup = 1;
+ }
+ if (need_setup) {
+ tw32(MAC_MODE,
+ (tp->mac_mode &
+ ~MAC_MODE_PORT_MODE_MASK));
+ tr32(MAC_MODE);
+ udelay(40);
+ tw32(MAC_MODE, tp->mac_mode);
+ tr32(MAC_MODE);
+ udelay(40);
+ tg3_setup_phy(tp);
+ }
+ }
+
+ tp->timer_counter = tp->timer_multiplier;
+ }
+
+ /* Heartbeat is only sent once every 120 seconds. */
+ if (!--tp->asf_counter) {
+ if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
+ u32 val;
+
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_ALIVE);
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 4);
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, 3);
+ val = tr32(GRC_RX_CPU_EVENT);
+ val |= (1 << 14);
+ tw32(GRC_RX_CPU_EVENT, val);
+ }
+ tp->asf_counter = tp->asf_multiplier;
+ }
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irqrestore(&tp->lock, flags);
+
+ tp->timer.expires = jiffies + tp->timer_offset;
+ add_timer(&tp->timer);
+}
+
+static int tg3_open(struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+ int err;
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tg3_disable_ints(tp);
+ tp->tg3_flags &= ~TG3_FLAG_INIT_COMPLETE;
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ /* If you move this call, make sure TG3_FLAG_HOST_TXDS in
+ * tp->tg3_flags is accurate at that new place.
+ */
+ err = tg3_alloc_consistent(tp);
+ if (err)
+ return err;
+
+ err = request_irq(dev->irq, tg3_interrupt,
+ SA_SHIRQ, dev->name, dev);
+
+ if (err) {
+ tg3_free_consistent(tp);
+ return err;
+ }
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tg3_init_rings(tp);
+
+ err = tg3_init_hw(tp);
+ if (err) {
+ tg3_halt(tp);
+ tg3_free_rings(tp);
+ } else {
+ tp->timer_offset = HZ / 10;
+ tp->timer_counter = tp->timer_multiplier = 10;
+ tp->asf_counter = tp->asf_multiplier = (10 * 120);
+
+ init_timer(&tp->timer);
+ tp->timer.expires = jiffies + tp->timer_offset;
+ tp->timer.data = (unsigned long) tp;
+ tp->timer.function = tg3_timer;
+ add_timer(&tp->timer);
+
+ tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+ }
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ if (err) {
+ free_irq(dev->irq, dev);
+ tg3_free_consistent(tp);
+ return err;
+ }
+
+ netif_start_queue(dev);
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tg3_enable_ints(tp);
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+}
+
+#if 0
+/*static*/ void tg3_dump_state(struct tg3 *tp)
+{
+ u32 val32, val32_2, val32_3, val32_4, val32_5;
+ u16 val16;
+ int i;
+
+ pci_read_config_word(tp->pdev, PCI_STATUS, &val16);
+ pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE, &val32);
+ printk("DEBUG: PCI status [%04x] TG3PCI state[%08x]\n",
+ val16, val32);
+
+ /* MAC block */
+ printk("DEBUG: MAC_MODE[%08x] MAC_STATUS[%08x]\n",
+ tr32(MAC_MODE), tr32(MAC_STATUS));
+ printk(" MAC_EVENT[%08x] MAC_LED_CTRL[%08x]\n",
+ tr32(MAC_EVENT), tr32(MAC_LED_CTRL));
+ printk("DEBUG: MAC_TX_MODE[%08x] MAC_TX_STATUS[%08x]\n",
+ tr32(MAC_TX_MODE), tr32(MAC_TX_STATUS));
+ printk(" MAC_RX_MODE[%08x] MAC_RX_STATUS[%08x]\n",
+ tr32(MAC_RX_MODE), tr32(MAC_RX_STATUS));
+
+ /* Send data initiator control block */
+ printk("DEBUG: SNDDATAI_MODE[%08x] SNDDATAI_STATUS[%08x]\n",
+ tr32(SNDDATAI_MODE), tr32(SNDDATAI_STATUS));
+ printk(" SNDDATAI_STATSCTRL[%08x]\n",
+ tr32(SNDDATAI_STATSCTRL));
+
+ /* Send data completion control block */
+ printk("DEBUG: SNDDATAC_MODE[%08x]\n", tr32(SNDDATAC_MODE));
+
+ /* Send BD ring selector block */
+ printk("DEBUG: SNDBDS_MODE[%08x] SNDBDS_STATUS[%08x]\n",
+ tr32(SNDBDS_MODE), tr32(SNDBDS_STATUS));
+
+ /* Send BD initiator control block */
+ printk("DEBUG: SNDBDI_MODE[%08x] SNDBDI_STATUS[%08x]\n",
+ tr32(SNDBDI_MODE), tr32(SNDBDI_STATUS));
+
+ /* Send BD completion control block */
+ printk("DEBUG: SNDBDC_MODE[%08x]\n", tr32(SNDBDC_MODE));
+
+ /* Receive list placement control block */
+ printk("DEBUG: RCVLPC_MODE[%08x] RCVLPC_STATUS[%08x]\n",
+ tr32(RCVLPC_MODE), tr32(RCVLPC_STATUS));
+ printk(" RCVLPC_STATSCTRL[%08x]\n",
+ tr32(RCVLPC_STATSCTRL));
+
+ /* Receive data and receive BD initiator control block */
+ printk("DEBUG: RCVDBDI_MODE[%08x] RCVDBDI_STATUS[%08x]\n",
+ tr32(RCVDBDI_MODE), tr32(RCVDBDI_STATUS));
+
+ /* Receive data completion control block */
+ printk("DEBUG: RCVDCC_MODE[%08x]\n",
+ tr32(RCVDCC_MODE));
+
+ /* Receive BD initiator control block */
+ printk("DEBUG: RCVBDI_MODE[%08x] RCVBDI_STATUS[%08x]\n",
+ tr32(RCVBDI_MODE), tr32(RCVBDI_STATUS));
+
+ /* Receive BD completion control block */
+ printk("DEBUG: RCVCC_MODE[%08x] RCVCC_STATUS[%08x]\n",
+ tr32(RCVCC_MODE), tr32(RCVCC_STATUS));
+
+ /* Receive list selector control block */
+ printk("DEBUG: RCVLSC_MODE[%08x] RCVLSC_STATUS[%08x]\n",
+ tr32(RCVLSC_MODE), tr32(RCVLSC_STATUS));
+
+ /* Mbuf cluster free block */
+ printk("DEBUG: MBFREE_MODE[%08x] MBFREE_STATUS[%08x]\n",
+ tr32(MBFREE_MODE), tr32(MBFREE_STATUS));
+
+ /* Host coalescing control block */
+ printk("DEBUG: HOSTCC_MODE[%08x] HOSTCC_STATUS[%08x]\n",
+ tr32(HOSTCC_MODE), tr32(HOSTCC_STATUS));
+ printk("DEBUG: HOSTCC_STATS_BLK_HOST_ADDR[%08x%08x]\n",
+ tr32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH),
+ tr32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW));
+ printk("DEBUG: HOSTCC_STATUS_BLK_HOST_ADDR[%08x%08x]\n",
+ tr32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH),
+ tr32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW));
+ printk("DEBUG: HOSTCC_STATS_BLK_NIC_ADDR[%08x]\n",
+ tr32(HOSTCC_STATS_BLK_NIC_ADDR));
+ printk("DEBUG: HOSTCC_STATUS_BLK_NIC_ADDR[%08x]\n",
+ tr32(HOSTCC_STATUS_BLK_NIC_ADDR));
+
+ /* Memory arbiter control block */
+ printk("DEBUG: MEMARB_MODE[%08x] MEMARB_STATUS[%08x]\n",
+ tr32(MEMARB_MODE), tr32(MEMARB_STATUS));
+
+ /* Buffer manager control block */
+ printk("DEBUG: BUFMGR_MODE[%08x] BUFMGR_STATUS[%08x]\n",
+ tr32(BUFMGR_MODE), tr32(BUFMGR_STATUS));
+ printk("DEBUG: BUFMGR_MB_POOL_ADDR[%08x] BUFMGR_MB_POOL_SIZE[%08x]\n",
+ tr32(BUFMGR_MB_POOL_ADDR), tr32(BUFMGR_MB_POOL_SIZE));
+ printk("DEBUG: BUFMGR_DMA_DESC_POOL_ADDR[%08x] "
+ "BUFMGR_DMA_DESC_POOL_SIZE[%08x]\n",
+ tr32(BUFMGR_DMA_DESC_POOL_ADDR),
+ tr32(BUFMGR_DMA_DESC_POOL_SIZE));
+
+ /* Read DMA control block */
+ printk("DEBUG: RDMAC_MODE[%08x] RDMAC_STATUS[%08x]\n",
+ tr32(RDMAC_MODE), tr32(RDMAC_STATUS));
+
+ /* Write DMA control block */
+ printk("DEBUG: WDMAC_MODE[%08x] WDMAC_STATUS[%08x]\n",
+ tr32(WDMAC_MODE), tr32(WDMAC_STATUS));
+
+ /* DMA completion block */
+ printk("DEBUG: DMAC_MODE[%08x]\n",
+ tr32(DMAC_MODE));
+
+ /* GRC block */
+ printk("DEBUG: GRC_MODE[%08x] GRC_MISC_CFG[%08x]\n",
+ tr32(GRC_MODE), tr32(GRC_MISC_CFG));
+ printk("DEBUG: GRC_LOCAL_CTRL[%08x]\n",
+ tr32(GRC_LOCAL_CTRL));
+
+ /* TG3_BDINFOs */
+ printk("DEBUG: RCVDBDI_JUMBO_BD[%08x%08x:%08x:%08x]\n",
+ tr32(RCVDBDI_JUMBO_BD + 0x0),
+ tr32(RCVDBDI_JUMBO_BD + 0x4),
+ tr32(RCVDBDI_JUMBO_BD + 0x8),
+ tr32(RCVDBDI_JUMBO_BD + 0xc));
+ printk("DEBUG: RCVDBDI_STD_BD[%08x%08x:%08x:%08x]\n",
+ tr32(RCVDBDI_STD_BD + 0x0),
+ tr32(RCVDBDI_STD_BD + 0x4),
+ tr32(RCVDBDI_STD_BD + 0x8),
+ tr32(RCVDBDI_STD_BD + 0xc));
+ printk("DEBUG: RCVDBDI_MINI_BD[%08x%08x:%08x:%08x]\n",
+ tr32(RCVDBDI_MINI_BD + 0x0),
+ tr32(RCVDBDI_MINI_BD + 0x4),
+ tr32(RCVDBDI_MINI_BD + 0x8),
+ tr32(RCVDBDI_MINI_BD + 0xc));
+
+ tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0x0, &val32);
+ tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0x4, &val32_2);
+ tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0x8, &val32_3);
+ tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0xc, &val32_4);
+ printk("DEBUG: SRAM_SEND_RCB_0[%08x%08x:%08x:%08x]\n",
+ val32, val32_2, val32_3, val32_4);
+
+ tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0x0, &val32);
+ tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0x4, &val32_2);
+ tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0x8, &val32_3);
+ tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0xc, &val32_4);
+ printk("DEBUG: SRAM_RCV_RET_RCB_0[%08x%08x:%08x:%08x]\n",
+ val32, val32_2, val32_3, val32_4);
+
+ tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x0, &val32);
+ tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x4, &val32_2);
+ tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x8, &val32_3);
+ tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0xc, &val32_4);
+ tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x10, &val32_5);
+ printk("DEBUG: SRAM_STATUS_BLK[%08x:%08x:%08x:%08x:%08x]\n",
+ val32, val32_2, val32_3, val32_4, val32_5);
+
+ /* SW status block */
+ printk("DEBUG: Host status block [%08x:%08x:(%04x:%04x:%04x):(%04x:%04x)]\n",
+ tp->hw_status->status,
+ tp->hw_status->status_tag,
+ tp->hw_status->rx_jumbo_consumer,
+ tp->hw_status->rx_consumer,
+ tp->hw_status->rx_mini_consumer,
+ tp->hw_status->idx[0].rx_producer,
+ tp->hw_status->idx[0].tx_consumer);
+
+ /* SW statistics block */
+ printk("DEBUG: Host statistics block [%08x:%08x:%08x:%08x]\n",
+ ((u32 *)tp->hw_stats)[0],
+ ((u32 *)tp->hw_stats)[1],
+ ((u32 *)tp->hw_stats)[2],
+ ((u32 *)tp->hw_stats)[3]);
+
+ /* Mailboxes */
+ printk("DEBUG: SNDHOST_PROD[%08x%08x] SNDNIC_PROD[%08x%08x]\n",
+ tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0),
+ tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4),
+ tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0),
+ tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4));
+
+ /* NIC side send descriptors. */
+ for (i = 0; i < 6; i++) {
+ unsigned long txd;
+
+ txd = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_TX_BUFFER_DESC
+ + (i * sizeof(struct tg3_tx_buffer_desc));
+ printk("DEBUG: NIC TXD(%d)[%08x:%08x:%08x:%08x]\n",
+ i,
+ readl(txd + 0x0), readl(txd + 0x4),
+ readl(txd + 0x8), readl(txd + 0xc));
+ }
+
+ /* NIC side RX descriptors. */
+ for (i = 0; i < 6; i++) {
+ unsigned long rxd;
+
+ rxd = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_RX_BUFFER_DESC
+ + (i * sizeof(struct tg3_rx_buffer_desc));
+ printk("DEBUG: NIC RXD_STD(%d)[0][%08x:%08x:%08x:%08x]\n",
+ i,
+ readl(rxd + 0x0), readl(rxd + 0x4),
+ readl(rxd + 0x8), readl(rxd + 0xc));
+ rxd += (4 * sizeof(u32));
+ printk("DEBUG: NIC RXD_STD(%d)[1][%08x:%08x:%08x:%08x]\n",
+ i,
+ readl(rxd + 0x0), readl(rxd + 0x4),
+ readl(rxd + 0x8), readl(rxd + 0xc));
+ }
+
+ for (i = 0; i < 6; i++) {
+ unsigned long rxd;
+
+ rxd = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_RX_JUMBO_BUFFER_DESC
+ + (i * sizeof(struct tg3_rx_buffer_desc));
+ printk("DEBUG: NIC RXD_JUMBO(%d)[0][%08x:%08x:%08x:%08x]\n",
+ i,
+ readl(rxd + 0x0), readl(rxd + 0x4),
+ readl(rxd + 0x8), readl(rxd + 0xc));
+ rxd += (4 * sizeof(u32));
+ printk("DEBUG: NIC RXD_JUMBO(%d)[1][%08x:%08x:%08x:%08x]\n",
+ i,
+ readl(rxd + 0x0), readl(rxd + 0x4),
+ readl(rxd + 0x8), readl(rxd + 0xc));
+ }
+}
+#endif
+
+static struct net_device_stats *tg3_get_stats(struct net_device *);
+
+static int tg3_close(struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+
+ netif_stop_queue(dev);
+
+ del_timer_sync(&tp->timer);
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+#if 0
+ tg3_dump_state(tp);
+#endif
+
+ tg3_disable_ints(tp);
+
+ tg3_halt(tp);
+ tg3_free_rings(tp);
+ tp->tg3_flags &=
+ ~(TG3_FLAG_INIT_COMPLETE |
+ TG3_FLAG_GOT_SERDES_FLOWCTL);
+ netif_carrier_off(tp->dev);
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ free_irq(dev->irq, dev);
+
+ memcpy(&tp->net_stats_prev, tg3_get_stats(tp->dev),
+ sizeof(tp->net_stats_prev));
+
+ tg3_free_consistent(tp);
+
+ return 0;
+}
+
+static inline unsigned long get_stat64(tg3_stat64_t *val)
+{
+ unsigned long ret;
+
+#if (BITS_PER_LONG == 32)
+ ret = val->low;
+#else
+ ret = ((u64)val->high << 32) | ((u64)val->low);
+#endif
+ return ret;
+}
+
+static unsigned long calc_crc_errors(struct tg3 *tp)
+{
+ struct tg3_hw_stats *hw_stats = tp->hw_stats;
+
+ if (tp->phy_id != PHY_ID_SERDES &&
+ (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)) {
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&tp->lock, flags);
+ tg3_readphy(tp, 0x1e, &val);
+ tg3_writephy(tp, 0x1e, val | 0x8000);
+ tg3_readphy(tp, 0x14, &val);
+ spin_unlock_irqrestore(&tp->lock, flags);
+
+ tp->phy_crc_errors += val;
+
+ return tp->phy_crc_errors;
+ }
+
+ return get_stat64(&hw_stats->rx_fcs_errors);
+}
+
+static struct net_device_stats *tg3_get_stats(struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+ struct net_device_stats *stats = &tp->net_stats;
+ struct net_device_stats *old_stats = &tp->net_stats_prev;
+ struct tg3_hw_stats *hw_stats = tp->hw_stats;
+
+ if (!hw_stats)
+ return old_stats;
+
+ stats->rx_packets = old_stats->rx_packets +
+ get_stat64(&hw_stats->rx_ucast_packets) +
+ get_stat64(&hw_stats->rx_mcast_packets) +
+ get_stat64(&hw_stats->rx_bcast_packets);
+
+ stats->tx_packets = old_stats->tx_packets +
+ get_stat64(&hw_stats->COS_out_packets[0]);
+
+ stats->rx_bytes = old_stats->rx_bytes +
+ get_stat64(&hw_stats->rx_octets);
+ stats->tx_bytes = old_stats->tx_bytes +
+ get_stat64(&hw_stats->tx_octets);
+
+ stats->rx_errors = old_stats->rx_errors +
+ get_stat64(&hw_stats->rx_errors);
+ stats->tx_errors = old_stats->tx_errors +
+ get_stat64(&hw_stats->tx_errors) +
+ get_stat64(&hw_stats->tx_mac_errors) +
+ get_stat64(&hw_stats->tx_carrier_sense_errors) +
+ get_stat64(&hw_stats->tx_discards);
+
+ stats->multicast = old_stats->multicast +
+ get_stat64(&hw_stats->rx_mcast_packets);
+ stats->collisions = old_stats->collisions +
+ get_stat64(&hw_stats->tx_collisions);
+
+ stats->rx_length_errors = old_stats->rx_length_errors +
+ get_stat64(&hw_stats->rx_frame_too_long_errors) +
+ get_stat64(&hw_stats->rx_undersize_packets);
+
+ stats->rx_over_errors = old_stats->rx_over_errors +
+ get_stat64(&hw_stats->rxbds_empty);
+ stats->rx_frame_errors = old_stats->rx_frame_errors +
+ get_stat64(&hw_stats->rx_align_errors);
+ stats->tx_aborted_errors = old_stats->tx_aborted_errors +
+ get_stat64(&hw_stats->tx_discards);
+ stats->tx_carrier_errors = old_stats->tx_carrier_errors +
+ get_stat64(&hw_stats->tx_carrier_sense_errors);
+
+ stats->rx_crc_errors = old_stats->rx_crc_errors +
+ calc_crc_errors(tp);
+
+ return stats;
+}
+
+static inline u32 calc_crc(unsigned char *buf, int len)
+{
+ u32 reg;
+ u32 tmp;
+ int j, k;
+
+ reg = 0xffffffff;
+
+ for (j = 0; j < len; j++) {
+ reg ^= buf[j];
+
+ for (k = 0; k < 8; k++) {
+ tmp = reg & 0x01;
+
+ reg >>= 1;
+
+ if (tmp) {
+ reg ^= 0xedb88320;
+ }
+ }
+ }
+
+ return ~reg;
+}
+
+static void tg3_set_multi(struct tg3 *tp, unsigned int accept_all)
+{
+ /* accept or reject all multicast frames */
+ tw32(MAC_HASH_REG_0, accept_all ? 0xffffffff : 0);
+ tw32(MAC_HASH_REG_1, accept_all ? 0xffffffff : 0);
+ tw32(MAC_HASH_REG_2, accept_all ? 0xffffffff : 0);
+ tw32(MAC_HASH_REG_3, accept_all ? 0xffffffff : 0);
+}
+
+static void __tg3_set_rx_mode(struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+ u32 rx_mode;
+
+ rx_mode = tp->rx_mode & ~(RX_MODE_PROMISC |
+ RX_MODE_KEEP_VLAN_TAG);
+#if TG3_VLAN_TAG_USED
+ if (!tp->vlgrp)
+ rx_mode |= RX_MODE_KEEP_VLAN_TAG;
+#else
+ /* By definition, VLAN is disabled always in this
+ * case.
+ */
+ rx_mode |= RX_MODE_KEEP_VLAN_TAG;
+#endif
+
+ if (dev->flags & IFF_PROMISC) {
+ /* Promiscuous mode. */
+ rx_mode |= RX_MODE_PROMISC;
+ } else if (dev->flags & IFF_ALLMULTI) {
+ /* Accept all multicast. */
+ tg3_set_multi (tp, 1);
+ } else if (dev->mc_count < 1) {
+ /* Reject all multicast. */
+ tg3_set_multi (tp, 0);
+ } else {
+ /* Accept one or more multicast(s). */
+ struct dev_mc_list *mclist;
+ unsigned int i;
+ u32 mc_filter[4] = { 0, };
+ u32 regidx;
+ u32 bit;
+ u32 crc;
+
+ for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
+ i++, mclist = mclist->next) {
+
+ crc = calc_crc (mclist->dmi_addr, ETH_ALEN);
+ bit = ~crc & 0x7f;
+ regidx = (bit & 0x60) >> 5;
+ bit &= 0x1f;
+ mc_filter[regidx] |= (1 << bit);
+ }
+
+ tw32(MAC_HASH_REG_0, mc_filter[0]);
+ tw32(MAC_HASH_REG_1, mc_filter[1]);
+ tw32(MAC_HASH_REG_2, mc_filter[2]);
+ tw32(MAC_HASH_REG_3, mc_filter[3]);
+ }
+
+ if (rx_mode != tp->rx_mode) {
+ tp->rx_mode = rx_mode;
+ tw32(MAC_RX_MODE, rx_mode);
+ tr32(MAC_RX_MODE);
+ udelay(10);
+ }
+}
+
+static void tg3_set_rx_mode(struct net_device *dev)
+{
+ struct tg3 *tp = dev->priv;
+
+ spin_lock_irq(&tp->lock);
+ __tg3_set_rx_mode(dev);
+ spin_unlock_irq(&tp->lock);
+}
+
+#define TG3_REGDUMP_LEN (32 * 1024)
+
+static u8 *tg3_get_regs(struct tg3 *tp)
+{
+ u8 *orig_p = kmalloc(TG3_REGDUMP_LEN, GFP_KERNEL);
+ u8 *p;
+ int i;
+
+ if (orig_p == NULL)
+ return NULL;
+
+ memset(orig_p, 0, TG3_REGDUMP_LEN);
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+#define __GET_REG32(reg) (*((u32 *)(p))++ = tr32(reg))
+#define GET_REG32_LOOP(base,len) \
+do { p = orig_p + (base); \
+ for (i = 0; i < len; i += 4) \
+ __GET_REG32((base) + i); \
+} while (0)
+#define GET_REG32_1(reg) \
+do { p = orig_p + (reg); \
+ __GET_REG32((reg)); \
+} while (0)
+
+ GET_REG32_LOOP(TG3PCI_VENDOR, 0xb0);
+ GET_REG32_LOOP(MAILBOX_INTERRUPT_0, 0x200);
+ GET_REG32_LOOP(MAC_MODE, 0x4f0);
+ GET_REG32_LOOP(SNDDATAI_MODE, 0xe0);
+ GET_REG32_1(SNDDATAC_MODE);
+ GET_REG32_LOOP(SNDBDS_MODE, 0x80);
+ GET_REG32_LOOP(SNDBDI_MODE, 0x48);
+ GET_REG32_1(SNDBDC_MODE);
+ GET_REG32_LOOP(RCVLPC_MODE, 0x20);
+ GET_REG32_LOOP(RCVLPC_SELLST_BASE, 0x15c);
+ GET_REG32_LOOP(RCVDBDI_MODE, 0x0c);
+ GET_REG32_LOOP(RCVDBDI_JUMBO_BD, 0x3c);
+ GET_REG32_LOOP(RCVDBDI_BD_PROD_IDX_0, 0x44);
+ GET_REG32_1(RCVDCC_MODE);
+ GET_REG32_LOOP(RCVBDI_MODE, 0x20);
+ GET_REG32_LOOP(RCVCC_MODE, 0x14);
+ GET_REG32_LOOP(RCVLSC_MODE, 0x08);
+ GET_REG32_1(MBFREE_MODE);
+ GET_REG32_LOOP(HOSTCC_MODE, 0x100);
+ GET_REG32_LOOP(MEMARB_MODE, 0x10);
+ GET_REG32_LOOP(BUFMGR_MODE, 0x58);
+ GET_REG32_LOOP(RDMAC_MODE, 0x08);
+ GET_REG32_LOOP(WDMAC_MODE, 0x08);
+ GET_REG32_LOOP(RX_CPU_BASE, 0x280);
+ GET_REG32_LOOP(TX_CPU_BASE, 0x280);
+ GET_REG32_LOOP(GRCMBOX_INTERRUPT_0, 0x110);
+ GET_REG32_LOOP(FTQ_RESET, 0x120);
+ GET_REG32_LOOP(MSGINT_MODE, 0x0c);
+ GET_REG32_1(DMAC_MODE);
+ GET_REG32_LOOP(GRC_MODE, 0x4c);
+ GET_REG32_LOOP(NVRAM_CMD, 0x24);
+
+#undef __GET_REG32
+#undef GET_REG32_LOOP
+#undef GET_REG32_1
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ return orig_p;
+}
+
+static int tg3_ethtool_ioctl (struct net_device *dev, void *useraddr)
+{
+ struct tg3 *tp = dev->priv;
+ struct pci_dev *pci_dev = tp->pdev;
+ u32 ethcmd;
+
+ if (copy_from_user (&ethcmd, useraddr, sizeof (ethcmd)))
+ return -EFAULT;
+
+ switch (ethcmd) {
+ case ETHTOOL_GDRVINFO:{
+ struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
+ strcpy (info.driver, DRV_MODULE_NAME);
+ strcpy (info.version, DRV_MODULE_VERSION);
+ memset(&info.fw_version, 0, sizeof(info.fw_version));
+ strcpy (info.bus_info, pci_dev->slot_name);
+ info.eedump_len = 0;
+ info.regdump_len = TG3_REGDUMP_LEN;
+ if (copy_to_user (useraddr, &info, sizeof (info)))
+ return -EFAULT;
+ return 0;
+ }
+
+ case ETHTOOL_GSET: {
+ struct ethtool_cmd cmd = { ETHTOOL_GSET };
+
+ if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+ tp->link_config.phy_is_low_power)
+ return -EAGAIN;
+ cmd.supported = (SUPPORTED_Autoneg);
+
+ if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY))
+ cmd.supported |= (SUPPORTED_1000baseT_Half |
+ SUPPORTED_1000baseT_Full);
+
+ if (tp->phy_id != PHY_ID_SERDES)
+ cmd.supported |= (SUPPORTED_100baseT_Half |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_10baseT_Half |
+ SUPPORTED_10baseT_Full |
+ SUPPORTED_MII);
+ else
+ cmd.supported |= SUPPORTED_FIBRE;
+
+ cmd.advertising = tp->link_config.advertising;
+ cmd.speed = tp->link_config.active_speed;
+ cmd.duplex = tp->link_config.active_duplex;
+ cmd.port = 0;
+ cmd.phy_address = PHY_ADDR;
+ cmd.transceiver = 0;
+ cmd.autoneg = tp->link_config.autoneg;
+ cmd.maxtxpkt = 0;
+ cmd.maxrxpkt = 0;
+ if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SSET: {
+ struct ethtool_cmd cmd;
+
+ if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+ tp->link_config.phy_is_low_power)
+ return -EAGAIN;
+
+ if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ return -EFAULT;
+
+ /* Fiber PHY only supports 1000 full/half */
+ if (cmd.autoneg == AUTONEG_ENABLE) {
+ if (tp->phy_id == PHY_ID_SERDES &&
+ (cmd.advertising &
+ (ADVERTISED_10baseT_Half |
+ ADVERTISED_10baseT_Full |
+ ADVERTISED_100baseT_Half |
+ ADVERTISED_100baseT_Full)))
+ return -EINVAL;
+ if ((tp->tg3_flags & TG3_FLAG_10_100_ONLY) &&
+ (cmd.advertising &
+ (ADVERTISED_1000baseT_Half |
+ ADVERTISED_1000baseT_Full)))
+ return -EINVAL;
+ } else {
+ if (tp->phy_id == PHY_ID_SERDES &&
+ (cmd.speed == SPEED_10 ||
+ cmd.speed == SPEED_100))
+ return -EINVAL;
+ if ((tp->tg3_flags & TG3_FLAG_10_100_ONLY) &&
+ (cmd.speed == SPEED_10 ||
+ cmd.speed == SPEED_100))
+ return -EINVAL;
+ }
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tp->link_config.autoneg = cmd.autoneg;
+ if (cmd.autoneg == AUTONEG_ENABLE) {
+ tp->link_config.advertising = cmd.advertising;
+ tp->link_config.speed = SPEED_INVALID;
+ tp->link_config.duplex = DUPLEX_INVALID;
+ } else {
+ tp->link_config.speed = cmd.speed;
+ tp->link_config.duplex = cmd.duplex;
+ }
+
+ tg3_setup_phy(tp);
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+ }
+
+ case ETHTOOL_GREGS: {
+ struct ethtool_regs regs;
+ u8 *regbuf;
+ int ret;
+
+ if (copy_from_user(&regs, useraddr, sizeof(regs)))
+ return -EFAULT;
+ if (regs.len > TG3_REGDUMP_LEN)
+ regs.len = TG3_REGDUMP_LEN;
+ regs.version = 0;
+ if (copy_to_user(useraddr, &regs, sizeof(regs)))
+ return -EFAULT;
+
+ regbuf = tg3_get_regs(tp);
+ if (!regbuf)
+ return -ENOMEM;
+
+ useraddr += offsetof(struct ethtool_regs, data);
+ ret = 0;
+ if (copy_to_user(useraddr, regbuf, regs.len))
+ ret = -EFAULT;
+ kfree(regbuf);
+ return ret;
+ }
+ case ETHTOOL_GWOL: {
+ struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+
+ wol.supported = WAKE_MAGIC;
+ wol.wolopts = 0;
+ if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)
+ wol.wolopts = WAKE_MAGIC;
+ memset(&wol.sopass, 0, sizeof(wol.sopass));
+ if (copy_to_user(useraddr, &wol, sizeof(wol)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SWOL: {
+ struct ethtool_wolinfo wol;
+
+ if (copy_from_user(&wol, useraddr, sizeof(wol)))
+ return -EFAULT;
+ if (wol.wolopts & ~WAKE_MAGIC)
+ return -EINVAL;
+ if ((wol.wolopts & WAKE_MAGIC) &&
+ tp->phy_id == PHY_ID_SERDES &&
+ !(tp->tg3_flags & TG3_FLAG_SERDES_WOL_CAP))
+ return -EINVAL;
+
+ spin_lock_irq(&tp->lock);
+ if (wol.wolopts & WAKE_MAGIC)
+ tp->tg3_flags |= TG3_FLAG_WOL_ENABLE;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_WOL_ENABLE;
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+ }
+ case ETHTOOL_GMSGLVL: {
+ struct ethtool_value edata = { ETHTOOL_GMSGLVL };
+ edata.data = tp->msg_enable;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SMSGLVL: {
+ struct ethtool_value edata;
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+ tp->msg_enable = edata.data;
+ return 0;
+ }
+ case ETHTOOL_NWAY_RST: {
+ u32 bmcr;
+ int r;
+
+ spin_lock_irq(&tp->lock);
+ tg3_readphy(tp, MII_BMCR, &bmcr);
+ tg3_readphy(tp, MII_BMCR, &bmcr);
+ r = -EINVAL;
+ if (bmcr & BMCR_ANENABLE) {
+ tg3_writephy(tp, MII_BMCR,
+ bmcr | BMCR_ANRESTART);
+ r = 0;
+ }
+ spin_unlock_irq(&tp->lock);
+
+ return r;
+ }
+ case ETHTOOL_GLINK: {
+ struct ethtool_value edata = { ETHTOOL_GLINK };
+ edata.data = netif_carrier_ok(tp->dev) ? 1 : 0;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_GRINGPARAM: {
+ struct ethtool_ringparam ering = { ETHTOOL_GRINGPARAM };
+
+ ering.rx_max_pending = TG3_RX_RING_SIZE - 1;
+ ering.rx_mini_max_pending = 0;
+ ering.rx_jumbo_max_pending = TG3_RX_JUMBO_RING_SIZE - 1;
+
+ ering.rx_pending = tp->rx_pending;
+ ering.rx_mini_pending = 0;
+ ering.rx_jumbo_pending = tp->rx_jumbo_pending;
+ ering.tx_pending = tp->tx_pending;
+
+ if (copy_to_user(useraddr, &ering, sizeof(ering)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SRINGPARAM: {
+ struct ethtool_ringparam ering;
+
+ if (copy_from_user(&ering, useraddr, sizeof(ering)))
+ return -EFAULT;
+
+ if ((ering.rx_pending > TG3_RX_RING_SIZE - 1) ||
+ (ering.rx_jumbo_pending > TG3_RX_JUMBO_RING_SIZE - 1) ||
+ (ering.tx_pending > TG3_TX_RING_SIZE - 1))
+ return -EINVAL;
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tp->rx_pending = ering.rx_pending;
+ tp->rx_jumbo_pending = ering.rx_jumbo_pending;
+ tp->tx_pending = ering.tx_pending;
+
+ tg3_halt(tp);
+ tg3_init_rings(tp);
+ tg3_init_hw(tp);
+ netif_wake_queue(tp->dev);
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+ }
+ case ETHTOOL_GPAUSEPARAM: {
+ struct ethtool_pauseparam epause = { ETHTOOL_GPAUSEPARAM };
+
+ epause.autoneg =
+ (tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG) != 0;
+ epause.rx_pause =
+ (tp->tg3_flags & TG3_FLAG_PAUSE_RX) != 0;
+ epause.tx_pause =
+ (tp->tg3_flags & TG3_FLAG_PAUSE_TX) != 0;
+ if (copy_to_user(useraddr, &epause, sizeof(epause)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SPAUSEPARAM: {
+ struct ethtool_pauseparam epause;
+
+ if (copy_from_user(&epause, useraddr, sizeof(epause)))
+ return -EFAULT;
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+ if (epause.autoneg)
+ tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_PAUSE_AUTONEG;
+ if (epause.rx_pause)
+ tp->tg3_flags |= TG3_FLAG_PAUSE_RX;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_PAUSE_RX;
+ if (epause.tx_pause)
+ tp->tg3_flags |= TG3_FLAG_PAUSE_TX;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_PAUSE_TX;
+ tg3_halt(tp);
+ tg3_init_rings(tp);
+ tg3_init_hw(tp);
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+ }
+ case ETHTOOL_GRXCSUM: {
+ struct ethtool_value edata = { ETHTOOL_GRXCSUM };
+
+ edata.data =
+ (tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) != 0;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SRXCSUM: {
+ struct ethtool_value edata;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ if (tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) {
+ if (edata.data != 0)
+ return -EINVAL;
+ return 0;
+ }
+
+ spin_lock_irq(&tp->lock);
+ if (edata.data)
+ tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+ }
+ case ETHTOOL_GTXCSUM: {
+ struct ethtool_value edata = { ETHTOOL_GTXCSUM };
+
+ edata.data =
+ (tp->dev->features & NETIF_F_IP_CSUM) != 0;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_STXCSUM: {
+ struct ethtool_value edata;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ if (tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) {
+ if (edata.data != 0)
+ return -EINVAL;
+ return 0;
+ }
+
+ if (edata.data)
+ tp->dev->features |= NETIF_F_IP_CSUM;
+ else
+ tp->dev->features &= ~NETIF_F_IP_CSUM;
+
+ return 0;
+ }
+ case ETHTOOL_GSG: {
+ struct ethtool_value edata = { ETHTOOL_GSG };
+
+ edata.data =
+ (tp->dev->features & NETIF_F_SG) != 0;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+ }
+ case ETHTOOL_SSG: {
+ struct ethtool_value edata;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ if (edata.data)
+ tp->dev->features |= NETIF_F_SG;
+ else
+ tp->dev->features &= ~NETIF_F_SG;
+
+ return 0;
+ }
+ };
+
+ return -EOPNOTSUPP;
+}
+
+static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data;
+ struct tg3 *tp = dev->priv;
+ int err;
+
+ switch(cmd) {
+ case SIOCETHTOOL:
+ return tg3_ethtool_ioctl(dev, (void *) ifr->ifr_data);
+ case SIOCGMIIPHY:
+ data->phy_id = PHY_ADDR;
+
+ /* fallthru */
+ case SIOCGMIIREG: {
+ u32 mii_regval;
+
+ spin_lock_irq(&tp->lock);
+ err = tg3_readphy(tp, data->reg_num & 0x1f, &mii_regval);
+ spin_unlock_irq(&tp->lock);
+
+ data->val_out = mii_regval;
+
+ return err;
+ }
+
+ case SIOCSMIIREG:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ spin_lock_irq(&tp->lock);
+ err = tg3_writephy(tp, data->reg_num & 0x1f, data->val_in);
+ spin_unlock_irq(&tp->lock);
+
+ return err;
+
+ default:
+ /* do nothing */
+ break;
+ }
+ return -EOPNOTSUPP;
+}
+
+#if TG3_VLAN_TAG_USED
+static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
+{
+ struct tg3 *tp = dev->priv;
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tp->vlgrp = grp;
+
+ /* Update RX_MODE_KEEP_VLAN_TAG bit in RX_MODE register. */
+ __tg3_set_rx_mode(dev);
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+}
+
+static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+{
+ struct tg3 *tp = dev->priv;
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+ if (tp->vlgrp)
+ tp->vlgrp->vlan_devices[vid] = NULL;
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+}
+#endif
+
+/* Chips other than 5700/5701 use the NVRAM for fetching info. */
+static void __devinit tg3_nvram_init(struct tg3 *tp)
+{
+ int j;
+
+ tw32(GRC_EEPROM_ADDR,
+ (EEPROM_ADDR_FSM_RESET |
+ (EEPROM_DEFAULT_CLOCK_PERIOD <<
+ EEPROM_ADDR_CLKPERD_SHIFT)));
+
+ /* XXX schedule_timeout() ... */
+ for (j = 0; j < 100; j++)
+ udelay(10);
+
+ /* Enable seeprom accesses. */
+ tw32(GRC_LOCAL_CTRL,
+ tr32(GRC_LOCAL_CTRL) | GRC_LCLCTRL_AUTO_SEEPROM);
+ tr32(GRC_LOCAL_CTRL);
+ udelay(100);
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+ GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) {
+ u32 nvcfg1 = tr32(NVRAM_CFG1);
+
+ tp->tg3_flags |= TG3_FLAG_NVRAM;
+ if (nvcfg1 & NVRAM_CFG1_FLASHIF_ENAB) {
+ if (nvcfg1 & NVRAM_CFG1_BUFFERED_MODE)
+ tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
+ } else {
+ nvcfg1 &= ~NVRAM_CFG1_COMPAT_BYPASS;
+ tw32(NVRAM_CFG1, nvcfg1);
+ }
+
+ } else {
+ tp->tg3_flags &= ~(TG3_FLAG_NVRAM | TG3_FLAG_NVRAM_BUFFERED);
+ }
+}
+
+static int __devinit tg3_nvram_read_using_eeprom(struct tg3 *tp,
+ u32 offset, u32 *val)
+{
+ u32 tmp;
+ int i;
+
+ if (offset > EEPROM_ADDR_ADDR_MASK ||
+ (offset % 4) != 0)
+ return -EINVAL;
+
+ tmp = tr32(GRC_EEPROM_ADDR) & ~(EEPROM_ADDR_ADDR_MASK |
+ EEPROM_ADDR_DEVID_MASK |
+ EEPROM_ADDR_READ);
+ tw32(GRC_EEPROM_ADDR,
+ tmp |
+ (0 << EEPROM_ADDR_DEVID_SHIFT) |
+ ((offset << EEPROM_ADDR_ADDR_SHIFT) &
+ EEPROM_ADDR_ADDR_MASK) |
+ EEPROM_ADDR_READ | EEPROM_ADDR_START);
+
+ for (i = 0; i < 10000; i++) {
+ tmp = tr32(GRC_EEPROM_ADDR);
+
+ if (tmp & EEPROM_ADDR_COMPLETE)
+ break;
+ udelay(100);
+ }
+ if (!(tmp & EEPROM_ADDR_COMPLETE))
+ return -EBUSY;
+
+ *val = tr32(GRC_EEPROM_DATA);
+ return 0;
+}
+
+static int __devinit tg3_nvram_read(struct tg3 *tp,
+ u32 offset, u32 *val)
+{
+ int i, saw_done_clear;
+
+ if (!(tp->tg3_flags & TG3_FLAG_NVRAM))
+ return tg3_nvram_read_using_eeprom(tp, offset, val);
+
+ if (tp->tg3_flags & TG3_FLAG_NVRAM_BUFFERED)
+ offset = ((offset / NVRAM_BUFFERED_PAGE_SIZE) <<
+ NVRAM_BUFFERED_PAGE_POS) +
+ (offset % NVRAM_BUFFERED_PAGE_SIZE);
+
+ if (offset > NVRAM_ADDR_MSK)
+ return -EINVAL;
+
+ tw32(NVRAM_SWARB, SWARB_REQ_SET1);
+ for (i = 0; i < 1000; i++) {
+ if (tr32(NVRAM_SWARB) & SWARB_GNT1)
+ break;
+ udelay(20);
+ }
+
+ tw32(NVRAM_ADDR, offset);
+ tw32(NVRAM_CMD,
+ NVRAM_CMD_RD | NVRAM_CMD_GO |
+ NVRAM_CMD_FIRST | NVRAM_CMD_LAST | NVRAM_CMD_DONE);
+
+ /* Wait for done bit to clear then set again. */
+ saw_done_clear = 0;
+ for (i = 0; i < 1000; i++) {
+ udelay(10);
+ if (!saw_done_clear &&
+ !(tr32(NVRAM_CMD) & NVRAM_CMD_DONE))
+ saw_done_clear = 1;
+ else if (saw_done_clear &&
+ (tr32(NVRAM_CMD) & NVRAM_CMD_DONE))
+ break;
+ }
+ if (i >= 1000) {
+ tw32(NVRAM_SWARB, SWARB_REQ_CLR1);
+ return -EBUSY;
+ }
+
+ *val = swab32(tr32(NVRAM_RDDATA));
+ tw32(NVRAM_SWARB, 0x20);
+
+ return 0;
+}
+
+struct subsys_tbl_ent {
+ u16 subsys_vendor, subsys_devid;
+ u32 phy_id;
+};
+
+static struct subsys_tbl_ent subsys_id_to_phy_id[] = {
+ /* Broadcom boards. */
+ { 0x14e4, 0x1644, PHY_ID_BCM5401 }, /* BCM95700A6 */
+ { 0x14e4, 0x0001, PHY_ID_BCM5701 }, /* BCM95701A5 */
+ { 0x14e4, 0x0002, PHY_ID_BCM8002 }, /* BCM95700T6 */
+ { 0x14e4, 0x0003, PHY_ID_SERDES }, /* BCM95700A9 */
+ { 0x14e4, 0x0005, PHY_ID_BCM5701 }, /* BCM95701T1 */
+ { 0x14e4, 0x0006, PHY_ID_BCM5701 }, /* BCM95701T8 */
+ { 0x14e4, 0x0007, PHY_ID_SERDES }, /* BCM95701A7 */
+ { 0x14e4, 0x0008, PHY_ID_BCM5701 }, /* BCM95701A10 */
+ { 0x14e4, 0x8008, PHY_ID_BCM5701 }, /* BCM95701A12 */
+ { 0x14e4, 0x0009, PHY_ID_BCM5701 }, /* BCM95703Ax1 */
+ { 0x14e4, 0x8009, PHY_ID_BCM5701 }, /* BCM95703Ax2 */
+
+ /* 3com boards. */
+ { PCI_VENDOR_ID_3COM, 0x1000, PHY_ID_BCM5401 }, /* 3C996T */
+ { PCI_VENDOR_ID_3COM, 0x1006, PHY_ID_BCM5701 }, /* 3C996BT */
+ /* { PCI_VENDOR_ID_3COM, 0x1002, PHY_ID_XXX }, 3C996CT */
+ /* { PCI_VENDOR_ID_3COM, 0x1003, PHY_ID_XXX }, 3C997T */
+ { PCI_VENDOR_ID_3COM, 0x1004, PHY_ID_SERDES }, /* 3C996SX */
+ /* { PCI_VENDOR_ID_3COM, 0x1005, PHY_ID_XXX }, 3C997SZ */
+ { PCI_VENDOR_ID_3COM, 0x1007, PHY_ID_BCM5701 }, /* 3C1000T */
+ { PCI_VENDOR_ID_3COM, 0x1008, PHY_ID_BCM5701 }, /* 3C940BR01 */
+
+ /* DELL boards. */
+ { PCI_VENDOR_ID_DELL, 0x00d1, PHY_ID_BCM5401 }, /* VIPER */
+ { PCI_VENDOR_ID_DELL, 0x0106, PHY_ID_BCM5401 }, /* JAGUAR */
+ { PCI_VENDOR_ID_DELL, 0x0109, PHY_ID_BCM5411 }, /* MERLOT */
+ { PCI_VENDOR_ID_DELL, 0x010a, PHY_ID_BCM5411 }, /* SLIM_MERLOT */
+
+ /* Compaq boards. */
+ { PCI_VENDOR_ID_COMPAQ, 0x007c, PHY_ID_BCM5701 }, /* BANSHEE */
+ { PCI_VENDOR_ID_COMPAQ, 0x009a, PHY_ID_BCM5701 }, /* BANSHEE_2 */
+ { PCI_VENDOR_ID_COMPAQ, 0x007d, PHY_ID_SERDES }, /* CHANGELING */
+ { PCI_VENDOR_ID_COMPAQ, 0x0085, PHY_ID_BCM5701 }, /* NC7780 */
+ { PCI_VENDOR_ID_COMPAQ, 0x0099, PHY_ID_BCM5701 } /* NC7780_2 */
+};
+
+static int __devinit tg3_phy_probe(struct tg3 *tp)
+{
+ u32 eeprom_phy_id, hw_phy_id_1, hw_phy_id_2;
+ u32 hw_phy_id, hw_phy_id_masked;
+ enum phy_led_mode eeprom_led_mode;
+ u32 val;
+ int i, eeprom_signature_found, err;
+
+ tp->phy_id = PHY_ID_INVALID;
+ for (i = 0; i < ARRAY_SIZE(subsys_id_to_phy_id); i++) {
+ if ((subsys_id_to_phy_id[i].subsys_vendor ==
+ tp->pdev->subsystem_vendor) &&
+ (subsys_id_to_phy_id[i].subsys_devid ==
+ tp->pdev->subsystem_device)) {
+ tp->phy_id = subsys_id_to_phy_id[i].phy_id;
+ break;
+ }
+ }
+
+ eeprom_phy_id = PHY_ID_INVALID;
+ eeprom_led_mode = led_mode_auto;
+ eeprom_signature_found = 0;
+ tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val);
+ if (val == NIC_SRAM_DATA_SIG_MAGIC) {
+ u32 nic_cfg;
+
+ tg3_read_mem(tp, NIC_SRAM_DATA_CFG, &nic_cfg);
+
+ eeprom_signature_found = 1;
+
+ if ((nic_cfg & NIC_SRAM_DATA_CFG_PHY_TYPE_MASK) ==
+ NIC_SRAM_DATA_CFG_PHY_TYPE_FIBER) {
+ eeprom_phy_id = PHY_ID_SERDES;
+ } else {
+ u32 nic_phy_id;
+
+ tg3_read_mem(tp, NIC_SRAM_DATA_PHY_ID, &nic_phy_id);
+ if (nic_phy_id != 0) {
+ u32 id1 = nic_phy_id & NIC_SRAM_DATA_PHY_ID1_MASK;
+ u32 id2 = nic_phy_id & NIC_SRAM_DATA_PHY_ID2_MASK;
+
+ eeprom_phy_id = (id1 >> 16) << 10;
+ eeprom_phy_id |= (id2 & 0xfc00) << 16;
+ eeprom_phy_id |= (id2 & 0x03ff) << 0;
+ }
+ }
+
+ switch (nic_cfg & NIC_SRAM_DATA_CFG_LED_MODE_MASK) {
+ case NIC_SRAM_DATA_CFG_LED_TRIPLE_SPD:
+ eeprom_led_mode = led_mode_three_link;
+ break;
+
+ case NIC_SRAM_DATA_CFG_LED_LINK_SPD:
+ eeprom_led_mode = led_mode_link10;
+ break;
+
+ default:
+ eeprom_led_mode = led_mode_auto;
+ break;
+ };
+ if ((tp->pci_chip_rev_id == CHIPREV_ID_5703_A1 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5703_A2) &&
+ (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP))
+ tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT;
+
+ if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE)
+ tp->tg3_flags |= TG3_FLAG_ENABLE_ASF;
+ if (nic_cfg & NIC_SRAM_DATA_CFG_FIBER_WOL)
+ tp->tg3_flags |= TG3_FLAG_SERDES_WOL_CAP;
+ }
+
+ /* Now read the physical PHY_ID from the chip and verify
+ * that it is sane. If it doesn't look good, we fall back
+ * to either the hard-coded table based PHY_ID and failing
+ * that the value found in the eeprom area.
+ */
+ err = tg3_readphy(tp, MII_PHYSID1, &hw_phy_id_1);
+ err |= tg3_readphy(tp, MII_PHYSID2, &hw_phy_id_2);
+
+ hw_phy_id = (hw_phy_id_1 & 0xffff) << 10;
+ hw_phy_id |= (hw_phy_id_2 & 0xfc00) << 16;
+ hw_phy_id |= (hw_phy_id_2 & 0x03ff) << 0;
+
+ hw_phy_id_masked = hw_phy_id & PHY_ID_MASK;
+
+ if (!err && KNOWN_PHY_ID(hw_phy_id_masked)) {
+ tp->phy_id = hw_phy_id;
+ } else {
+ /* phy_id currently holds the value found in the
+ * subsys_id_to_phy_id[] table or PHY_ID_INVALID
+ * if a match was not found there.
+ */
+ if (tp->phy_id == PHY_ID_INVALID) {
+ if (!eeprom_signature_found ||
+ !KNOWN_PHY_ID(eeprom_phy_id & PHY_ID_MASK))
+ return -ENODEV;
+ tp->phy_id = eeprom_phy_id;
+ }
+ }
+
+ err = tg3_phy_reset(tp, 1);
+ if (err)
+ return err;
+
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5701_B0) {
+ u32 mii_tg3_ctrl;
+
+ /* These chips, when reset, only advertise 10Mb
+ * capabilities. Fix that.
+ */
+ err = tg3_writephy(tp, MII_ADVERTISE,
+ (ADVERTISE_CSMA |
+ ADVERTISE_PAUSE_CAP |
+ ADVERTISE_10HALF |
+ ADVERTISE_10FULL |
+ ADVERTISE_100HALF |
+ ADVERTISE_100FULL));
+ mii_tg3_ctrl = (MII_TG3_CTRL_ADV_1000_HALF |
+ MII_TG3_CTRL_ADV_1000_FULL |
+ MII_TG3_CTRL_AS_MASTER |
+ MII_TG3_CTRL_ENABLE_AS_MASTER);
+ if (tp->tg3_flags & TG3_FLAG_10_100_ONLY)
+ mii_tg3_ctrl = 0;
+
+ err |= tg3_writephy(tp, MII_TG3_CTRL, mii_tg3_ctrl);
+ err |= tg3_writephy(tp, MII_BMCR,
+ (BMCR_ANRESTART | BMCR_ANENABLE));
+ }
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703) {
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
+ tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x201f);
+ tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x2aaa);
+ }
+
+ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) &&
+ (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0)) {
+ tg3_writephy(tp, 0x1c, 0x8d68);
+ tg3_writephy(tp, 0x1c, 0x8d68);
+ }
+
+ /* Enable Ethernet@WireSpeed */
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x7007);
+ tg3_readphy(tp, MII_TG3_AUX_CTRL, &val);
+ tg3_writephy(tp, MII_TG3_AUX_CTRL, (val | (1 << 15) | (1 << 4)));
+
+ if (!err && ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401)) {
+ err = tg3_init_5401phy_dsp(tp);
+ }
+
+ /* Determine the PHY led mode. */
+ if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) {
+ tp->led_mode = led_mode_link10;
+ } else {
+ tp->led_mode = led_mode_three_link;
+ if (eeprom_signature_found &&
+ eeprom_led_mode != led_mode_auto)
+ tp->led_mode = eeprom_led_mode;
+ }
+
+ if (tp->phy_id == PHY_ID_SERDES)
+ tp->link_config.advertising =
+ (ADVERTISED_1000baseT_Half |
+ ADVERTISED_1000baseT_Full |
+ ADVERTISED_Autoneg |
+ ADVERTISED_FIBRE);
+ if (tp->tg3_flags & TG3_FLAG_10_100_ONLY)
+ tp->link_config.advertising &=
+ ~(ADVERTISED_1000baseT_Half |
+ ADVERTISED_1000baseT_Full);
+
+ return err;
+}
+
+static void __devinit tg3_read_partno(struct tg3 *tp)
+{
+ unsigned char vpd_data[256];
+ int i;
+
+ for (i = 0; i < 256; i += 4) {
+ u32 tmp;
+
+ if (tg3_nvram_read(tp, 0x100 + i, &tmp))
+ goto out_not_found;
+
+ vpd_data[i + 0] = ((tmp >> 0) & 0xff);
+ vpd_data[i + 1] = ((tmp >> 8) & 0xff);
+ vpd_data[i + 2] = ((tmp >> 16) & 0xff);
+ vpd_data[i + 3] = ((tmp >> 24) & 0xff);
+ }
+
+ /* Now parse and find the part number. */
+ for (i = 0; i < 256; ) {
+ unsigned char val = vpd_data[i];
+ int block_end;
+
+ if (val == 0x82 || val == 0x91) {
+ i = (i + 3 +
+ (vpd_data[i + 1] +
+ (vpd_data[i + 2] << 8)));
+ continue;
+ }
+
+ if (val != 0x90)
+ goto out_not_found;
+
+ block_end = (i + 3 +
+ (vpd_data[i + 1] +
+ (vpd_data[i + 2] << 8)));
+ i += 3;
+ while (i < block_end) {
+ if (vpd_data[i + 0] == 'P' &&
+ vpd_data[i + 1] == 'N') {
+ int partno_len = vpd_data[i + 2];
+
+ if (partno_len > 24)
+ goto out_not_found;
+
+ memcpy(tp->board_part_number,
+ &vpd_data[i + 3],
+ partno_len);
+
+ /* Success. */
+ return;
+ }
+ }
+
+ /* Part number not found. */
+ goto out_not_found;
+ }
+
+out_not_found:
+ strcpy(tp->board_part_number, "none");
+}
+
+static int __devinit tg3_get_invariants(struct tg3 *tp)
+{
+ u32 misc_ctrl_reg;
+ u32 cacheline_sz_reg;
+ u32 pci_state_reg, grc_misc_cfg;
+ u16 pci_cmd;
+ int err;
+
+ /* If we have an AMD 762 or Intel ICH/ICH0 chipset, write
+ * reordering to the mailbox registers done by the host
+ * controller can cause major troubles. We read back from
+ * every mailbox register write to force the writes to be
+ * posted to the chip in order.
+ */
+ if (pci_find_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82801AA_8, NULL) ||
+ pci_find_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82801AB_8, NULL) ||
+ pci_find_device(PCI_VENDOR_ID_AMD,
+ PCI_DEVICE_ID_AMD_FE_GATE_700C, NULL))
+ tp->tg3_flags |= TG3_FLAG_MBOX_WRITE_REORDER;
+
+ /* Force memory write invalidate off. If we leave it on,
+ * then on 5700_BX chips we have to enable a workaround.
+ * The workaround is to set the TG3PCI_DMA_RW_CTRL boundry
+ * to match the cacheline size. The Broadcom driver have this
+ * workaround but turns MWI off all the times so never uses
+ * it. This seems to suggest that the workaround is insufficient.
+ */
+ pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
+ pci_cmd &= ~PCI_COMMAND_INVALIDATE;
+ pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd);
+
+ /* It is absolutely critical that TG3PCI_MISC_HOST_CTRL
+ * has the register indirect write enable bit set before
+ * we try to access any of the MMIO registers. It is also
+ * critical that the PCI-X hw workaround situation is decided
+ * before that as well.
+ */
+ pci_read_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+ &misc_ctrl_reg);
+
+ tp->pci_chip_rev_id = (misc_ctrl_reg >>
+ MISC_HOST_CTRL_CHIPREV_SHIFT);
+
+ /* Initialize misc host control in PCI block. */
+ tp->misc_host_ctrl |= (misc_ctrl_reg &
+ MISC_HOST_CTRL_CHIPREV);
+ pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+ tp->misc_host_ctrl);
+
+ pci_read_config_dword(tp->pdev, TG3PCI_CACHELINESZ,
+ &cacheline_sz_reg);
+
+ tp->pci_cacheline_sz = (cacheline_sz_reg >> 0) & 0xff;
+ tp->pci_lat_timer = (cacheline_sz_reg >> 8) & 0xff;
+ tp->pci_hdr_type = (cacheline_sz_reg >> 16) & 0xff;
+ tp->pci_bist = (cacheline_sz_reg >> 24) & 0xff;
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 &&
+ tp->pci_lat_timer < 64) {
+ tp->pci_lat_timer = 64;
+
+ cacheline_sz_reg = ((tp->pci_cacheline_sz & 0xff) << 0);
+ cacheline_sz_reg |= ((tp->pci_lat_timer & 0xff) << 8);
+ cacheline_sz_reg |= ((tp->pci_hdr_type & 0xff) << 16);
+ cacheline_sz_reg |= ((tp->pci_bist & 0xff) << 24);
+
+ pci_write_config_dword(tp->pdev, TG3PCI_CACHELINESZ,
+ cacheline_sz_reg);
+ }
+
+ pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE,
+ &pci_state_reg);
+
+ if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0) {
+ tp->tg3_flags |= TG3_FLAG_PCIX_MODE;
+
+ /* If this is a 5700 BX chipset, and we are in PCI-X
+ * mode, enable register write workaround.
+ *
+ * The workaround is to use indirect register accesses
+ * for all chip writes not to mailbox registers.
+ */
+ if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX) {
+ u32 pm_reg;
+ u16 pci_cmd;
+
+ tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG;
+
+ /* The chip can have it's power management PCI config
+ * space registers clobbered due to this bug.
+ * So explicitly force the chip into D0 here.
+ */
+ pci_read_config_dword(tp->pdev, TG3PCI_PM_CTRL_STAT,
+ &pm_reg);
+ pm_reg &= ~PCI_PM_CTRL_STATE_MASK;
+ pm_reg |= PCI_PM_CTRL_PME_ENABLE | 0 /* D0 */;
+ pci_write_config_dword(tp->pdev, TG3PCI_PM_CTRL_STAT,
+ pm_reg);
+
+ /* Also, force SERR#/PERR# in PCI command. */
+ pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
+ pci_cmd |= PCI_COMMAND_PARITY | PCI_COMMAND_SERR;
+ pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd);
+ }
+ }
+ if ((pci_state_reg & PCISTATE_BUS_SPEED_HIGH) != 0)
+ tp->tg3_flags |= TG3_FLAG_PCI_HIGH_SPEED;
+ if ((pci_state_reg & PCISTATE_BUS_32BIT) != 0)
+ tp->tg3_flags |= TG3_FLAG_PCI_32BIT;
+
+ /* Chip-specific fixup from Broadcom driver */
+ if ((tp->pci_chip_rev_id == CHIPREV_ID_5704_A0) &&
+ (!(pci_state_reg & PCISTATE_RETRY_SAME_DMA))) {
+ pci_state_reg |= PCISTATE_RETRY_SAME_DMA;
+ pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg);
+ }
+
+ /* Force the chip into D0. */
+ err = tg3_set_power_state(tp, 0);
+ if (err) {
+ printk(KERN_ERR PFX "(%s) transition to D0 failed\n",
+ tp->pdev->slot_name);
+ return err;
+ }
+
+ /* 5700 B0 chips do not support checksumming correctly due
+ * to hardware bugs.
+ */
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5700_B0)
+ tp->tg3_flags |= TG3_FLAG_BROKEN_CHECKSUMS;
+
+ /* Regardless of whether checksums work or not, we configure
+ * the StrongARM chips to not compute the pseudo header checksums
+ * in either direction. Because of the way Linux checksum support
+ * works we do not need the chips to do this, and taking the load
+ * off of the TX/RX onboard StrongARM cpus means that they will not be
+ * the bottleneck. Whoever wrote Broadcom's driver did not
+ * understand the situation at all. He could have bothered
+ * to read Jes's Acenic driver because the logic (and this part of
+ * the Tigon2 hardware/firmware) is pretty much identical.
+ */
+ tp->tg3_flags |= TG3_FLAG_NO_TX_PSEUDO_CSUM;
+ tp->tg3_flags |= TG3_FLAG_NO_RX_PSEUDO_CSUM;
+
+ /* Derive initial jumbo mode from MTU assigned in
+ * ether_setup() via the alloc_etherdev() call
+ */
+ if (tp->dev->mtu > ETH_DATA_LEN)
+ tp->tg3_flags |= TG3_FLAG_JUMBO_ENABLE;
+
+ /* Determine WakeOnLan speed to use. */
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5701_B0 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5701_B2) {
+ tp->tg3_flags &= ~(TG3_FLAG_WOL_SPEED_100MB);
+ } else {
+ tp->tg3_flags |= TG3_FLAG_WOL_SPEED_100MB;
+ }
+
+ /* Only 5701 and later support tagged irq status mode.
+ *
+ * However, since we are using NAPI avoid tagged irq status
+ * because the interrupt condition is more difficult to
+ * fully clear in that mode.
+ */
+ tp->coalesce_mode = 0;
+
+ if (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_AX &&
+ GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_BX)
+ tp->coalesce_mode |= HOSTCC_MODE_32BYTE;
+
+ /* Initialize MAC MI mode, polling disabled. */
+ tw32(MAC_MI_MODE, tp->mi_mode);
+ tr32(MAC_MI_MODE);
+ udelay(40);
+
+ /* Initialize data/descriptor byte/word swapping. */
+ tw32(GRC_MODE, tp->grc_mode);
+
+ tg3_switch_clocks(tp);
+
+ /* Clear this out for sanity. */
+ tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);
+
+ pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE,
+ &pci_state_reg);
+ if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0 &&
+ (tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) == 0) {
+ u32 chiprevid = GET_CHIP_REV_ID(tp->misc_host_ctrl);
+
+ if (chiprevid == CHIPREV_ID_5701_A0 ||
+ chiprevid == CHIPREV_ID_5701_B0 ||
+ chiprevid == CHIPREV_ID_5701_B2 ||
+ chiprevid == CHIPREV_ID_5701_B5) {
+ unsigned long sram_base;
+
+ /* Write some dummy words into the SRAM status block
+ * area, see if it reads back correctly. If the return
+ * value is bad, force enable the PCIX workaround.
+ */
+ sram_base = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_STATS_BLK;
+
+ writel(0x00000000, sram_base);
+ writel(0x00000000, sram_base + 4);
+ writel(0xffffffff, sram_base + 4);
+ if (readl(sram_base) != 0x00000000)
+ tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG;
+ }
+ }
+
+ udelay(50);
+ tg3_nvram_init(tp);
+
+ /* Determine if TX descriptors will reside in
+ * main memory or in the chip SRAM.
+ */
+ if (tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG)
+ tp->tg3_flags |= TG3_FLAG_HOST_TXDS;
+
+ /* Quick sanity check. Make sure we see an expected
+ * value here.
+ */
+ grc_misc_cfg = tr32(GRC_MISC_CFG);
+ grc_misc_cfg &= GRC_MISC_CFG_BOARD_ID_MASK;
+ if (grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5700 &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5701 &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5702FE &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5703 &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5703S &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5704 &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5704_A2 &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5704_X &&
+ grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_AC91002A1) {
+ printk(KERN_ERR PFX "(%s) unknown board id 0x%08X\n",
+ tp->pdev->slot_name, grc_misc_cfg);
+ return -ENODEV;
+ }
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 &&
+ grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5704CIOBE) {
+ tp->tg3_flags |= TG3_FLAG_SPLIT_MODE;
+ tp->split_mode_max_reqs = SPLIT_MODE_5704_MAX_REQ;
+ }
+
+ /* ROFL, you should see Broadcom's driver code implementing
+ * this, stuff like "if (a || b)" where a and b are always
+ * mutually exclusive. DaveM finds like 6 bugs today, hello!
+ */
+ if (grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5702FE)
+ tp->tg3_flags |= TG3_FLAG_10_100_ONLY;
+
+ err = tg3_phy_probe(tp);
+ if (err) {
+ printk(KERN_ERR PFX "(%s) phy probe failed, err %d\n",
+ tp->pdev->slot_name, err);
+ /* ... but do not return immediately ... */
+ }
+
+ tg3_read_partno(tp);
+
+ if (tp->phy_id == PHY_ID_SERDES) {
+ tp->tg3_flags &= ~TG3_FLAG_USE_MI_INTERRUPT;
+
+ /* And override led_mode in case Dell ever makes
+ * a fibre board.
+ */
+ tp->led_mode = led_mode_three_link;
+ } else {
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+ tp->tg3_flags |= TG3_FLAG_USE_MI_INTERRUPT;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_USE_MI_INTERRUPT;
+ }
+
+ /* 5700 {AX,BX} chips have a broken status block link
+ * change bit implementation, so we must use the
+ * status register in those cases.
+ */
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+ tp->tg3_flags |= TG3_FLAG_USE_LINKCHG_REG;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_USE_LINKCHG_REG;
+
+ /* The led_mode is set during tg3_phy_probe, here we might
+ * have to force the link status polling mechanism based
+ * upon subsystem IDs.
+ */
+ if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
+ tp->phy_id != PHY_ID_SERDES) {
+ tp->tg3_flags |= (TG3_FLAG_USE_MI_INTERRUPT |
+ TG3_FLAG_USE_LINKCHG_REG);
+ }
+
+ /* For all SERDES we poll the MAC status register. */
+ if (tp->phy_id == PHY_ID_SERDES)
+ tp->tg3_flags |= TG3_FLAG_POLL_SERDES;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES;
+
+ /* 5700 BX chips need to have their TX producer index mailboxes
+ * written twice to workaround a bug.
+ */
+ if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX)
+ tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG;
+ else
+ tp->tg3_flags &= ~TG3_FLAG_TXD_MBOX_HWBUG;
+
+ /* 5700 chips can get confused if TX buffers straddle the
+ * 4GB address boundary in some cases.
+ */
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
+ /* ROFL! Latest Broadcom driver disables NETIF_F_HIGHDMA
+ * in this case instead of fixing their workaround code.
+ *
+ * Like, hey, there is this skb_copy() thing guys,
+ * use it. Oh I can't stop laughing...
+ */
+ tp->dev->hard_start_xmit = tg3_start_xmit_4gbug;
+ } else {
+ tp->dev->hard_start_xmit = tg3_start_xmit;
+ }
+
+ tp->rx_offset = 2;
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 &&
+ (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0)
+ printk("WARNING: This card may not support unaligned receive pointers.\n");
+ //tp->rx_offset = 0;
+
+ /* By default, disable wake-on-lan. User can change this
+ * using ETHTOOL_SWOL.
+ */
+ tp->tg3_flags &= ~TG3_FLAG_WOL_ENABLE;
+
+ return err;
+}
+
+static int __devinit tg3_get_device_address(struct tg3 *tp)
+{
+ struct net_device *dev = tp->dev;
+ u32 hi, lo, mac_offset;
+
+ if (PCI_FUNC(tp->pdev->devfn) == 0)
+ mac_offset = 0x7c;
+ else
+ mac_offset = 0xcc;
+
+ /* First try to get it from MAC address mailbox. */
+ tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_HIGH_MBOX, &hi);
+ if ((hi >> 16) == 0x484b) {
+ dev->dev_addr[0] = (hi >> 8) & 0xff;
+ dev->dev_addr[1] = (hi >> 0) & 0xff;
+
+ tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_LOW_MBOX, &lo);
+ dev->dev_addr[2] = (lo >> 24) & 0xff;
+ dev->dev_addr[3] = (lo >> 16) & 0xff;
+ dev->dev_addr[4] = (lo >> 8) & 0xff;
+ dev->dev_addr[5] = (lo >> 0) & 0xff;
+ }
+ /* Next, try NVRAM. */
+ else if (!tg3_nvram_read(tp, mac_offset + 0, &hi) &&
+ !tg3_nvram_read(tp, mac_offset + 4, &lo)) {
+ dev->dev_addr[0] = ((hi >> 16) & 0xff);
+ dev->dev_addr[1] = ((hi >> 24) & 0xff);
+ dev->dev_addr[2] = ((lo >> 0) & 0xff);
+ dev->dev_addr[3] = ((lo >> 8) & 0xff);
+ dev->dev_addr[4] = ((lo >> 16) & 0xff);
+ dev->dev_addr[5] = ((lo >> 24) & 0xff);
+ }
+ /* Finally just fetch it out of the MAC control regs. */
+ else {
+ hi = tr32(MAC_ADDR_0_HIGH);
+ lo = tr32(MAC_ADDR_0_LOW);
+
+ dev->dev_addr[5] = lo & 0xff;
+ dev->dev_addr[4] = (lo >> 8) & 0xff;
+ dev->dev_addr[3] = (lo >> 16) & 0xff;
+ dev->dev_addr[2] = (lo >> 24) & 0xff;
+ dev->dev_addr[1] = hi & 0xff;
+ dev->dev_addr[0] = (hi >> 8) & 0xff;
+ }
+
+ if (!is_valid_ether_addr(&dev->dev_addr[0]))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dma, int size, int to_device)
+{
+ struct tg3_internal_buffer_desc test_desc;
+ u32 sram_dma_descs;
+ int i, ret;
+
+ sram_dma_descs = NIC_SRAM_DMA_DESC_POOL_BASE;
+
+ tw32(FTQ_RCVBD_COMP_FIFO_ENQDEQ, 0);
+ tw32(FTQ_RCVDATA_COMP_FIFO_ENQDEQ, 0);
+ tw32(RDMAC_STATUS, 0);
+ tw32(WDMAC_STATUS, 0);
+
+ tw32(BUFMGR_MODE, 0);
+ tw32(FTQ_RESET, 0);
+
+ /* pci_alloc_consistent gives only non-DAC addresses */
+ test_desc.addr_hi = 0;
+ test_desc.addr_lo = buf_dma & 0xffffffff;
+ test_desc.nic_mbuf = 0x00002100;
+ test_desc.len = size;
+ if (to_device) {
+ test_desc.cqid_sqid = (13 << 8) | 2;
+ tw32(RDMAC_MODE, RDMAC_MODE_RESET);
+ tr32(RDMAC_MODE);
+ udelay(40);
+
+ tw32(RDMAC_MODE, RDMAC_MODE_ENABLE);
+ tr32(RDMAC_MODE);
+ udelay(40);
+ } else {
+ test_desc.cqid_sqid = (16 << 8) | 7;
+ tw32(WDMAC_MODE, WDMAC_MODE_RESET);
+ tr32(WDMAC_MODE);
+ udelay(40);
+
+ tw32(WDMAC_MODE, WDMAC_MODE_ENABLE);
+ tr32(WDMAC_MODE);
+ udelay(40);
+ }
+ test_desc.flags = 0x00000004;
+
+ for (i = 0; i < (sizeof(test_desc) / sizeof(u32)); i++) {
+ u32 val;
+
+ val = *(((u32 *)&test_desc) + i);
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR,
+ sram_dma_descs + (i * sizeof(u32)));
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
+ }
+ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
+
+ if (to_device) {
+ tw32(FTQ_DMA_HIGH_READ_FIFO_ENQDEQ, sram_dma_descs);
+ } else {
+ tw32(FTQ_DMA_HIGH_WRITE_FIFO_ENQDEQ, sram_dma_descs);
+ }
+
+ ret = -ENODEV;
+ for (i = 0; i < 40; i++) {
+ u32 val;
+
+ if (to_device)
+ val = tr32(FTQ_RCVBD_COMP_FIFO_ENQDEQ);
+ else
+ val = tr32(FTQ_RCVDATA_COMP_FIFO_ENQDEQ);
+ if ((val & 0xffff) == sram_dma_descs) {
+ ret = 0;
+ break;
+ }
+
+ udelay(100);
+ }
+
+ return ret;
+}
+
+#define TEST_BUFFER_SIZE 0x400
+
+static int __devinit tg3_test_dma(struct tg3 *tp)
+{
+ dma_addr_t buf_dma;
+ u32 *buf;
+ int ret;
+
+ buf = pci_alloc_consistent(tp->pdev, TEST_BUFFER_SIZE, &buf_dma);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out_nofree;
+ }
+
+ tw32(TG3PCI_CLOCK_CTRL, 0);
+
+ if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) == 0) {
+ tp->dma_rwctrl =
+ (0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
+ (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) |
+ (0x7 << DMA_RWCTRL_WRITE_WATER_SHIFT) |
+ (0x7 << DMA_RWCTRL_READ_WATER_SHIFT) |
+ (0x0f << DMA_RWCTRL_MIN_DMA_SHIFT);
+ } else {
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704)
+ tp->dma_rwctrl =
+ (0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
+ (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) |
+ (0x3 << DMA_RWCTRL_WRITE_WATER_SHIFT) |
+ (0x7 << DMA_RWCTRL_READ_WATER_SHIFT) |
+ (0x00 << DMA_RWCTRL_MIN_DMA_SHIFT);
+ else
+ tp->dma_rwctrl =
+ (0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
+ (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) |
+ (0x3 << DMA_RWCTRL_WRITE_WATER_SHIFT) |
+ (0x3 << DMA_RWCTRL_READ_WATER_SHIFT) |
+ (0x0f << DMA_RWCTRL_MIN_DMA_SHIFT);
+
+ /* Wheee, some more chip bugs... */
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5703_A1 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5703_A2 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5703_A3 ||
+ tp->pci_chip_rev_id == CHIPREV_ID_5704_A0)
+ tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA;
+ }
+
+ /* We don't do this on x86 because it seems to hurt performace.
+ * It does help things on other platforms though.
+ */
+#ifndef CONFIG_X86
+ {
+ u8 byte;
+ int cacheline_size;
+ pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte);
+
+ if (byte == 0)
+ cacheline_size = 1024;
+ else
+ cacheline_size = (int) byte * 4;
+
+ tp->dma_rwctrl &= ~(DMA_RWCTRL_READ_BNDRY_MASK |
+ DMA_RWCTRL_WRITE_BNDRY_MASK);
+
+ switch (cacheline_size) {
+ case 16:
+ tp->dma_rwctrl |=
+ (DMA_RWCTRL_READ_BNDRY_16 |
+ DMA_RWCTRL_WRITE_BNDRY_16);
+ break;
+
+ case 32:
+ tp->dma_rwctrl |=
+ (DMA_RWCTRL_READ_BNDRY_32 |
+ DMA_RWCTRL_WRITE_BNDRY_32);
+ break;
+
+ case 64:
+ tp->dma_rwctrl |=
+ (DMA_RWCTRL_READ_BNDRY_64 |
+ DMA_RWCTRL_WRITE_BNDRY_64);
+ break;
+
+ case 128:
+ tp->dma_rwctrl |=
+ (DMA_RWCTRL_READ_BNDRY_128 |
+ DMA_RWCTRL_WRITE_BNDRY_128);
+ break;
+
+ case 256:
+ tp->dma_rwctrl |=
+ (DMA_RWCTRL_READ_BNDRY_256 |
+ DMA_RWCTRL_WRITE_BNDRY_256);
+ break;
+
+ case 512:
+ tp->dma_rwctrl |=
+ (DMA_RWCTRL_READ_BNDRY_512 |
+ DMA_RWCTRL_WRITE_BNDRY_512);
+ break;
+
+ case 1024:
+ tp->dma_rwctrl |=
+ (DMA_RWCTRL_READ_BNDRY_1024 |
+ DMA_RWCTRL_WRITE_BNDRY_1024);
+ break;
+ };
+ }
+#endif
+
+ /* Remove this if it causes problems for some boards. */
+ tp->dma_rwctrl |= DMA_RWCTRL_USE_MEM_READ_MULT;
+
+ tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+ GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701)
+ return 0;
+
+ ret = 0;
+ while (1) {
+ u32 *p, i;
+
+ p = buf;
+ for (i = 0; i < TEST_BUFFER_SIZE / sizeof(u32); i++)
+ p[i] = i;
+
+ /* Send the buffer to the chip. */
+ ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, 1);
+ if (ret)
+ break;
+
+ p = buf;
+ for (i = 0; i < TEST_BUFFER_SIZE / sizeof(u32); i++)
+ p[i] = 0;
+
+ /* Now read it back. */
+ ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, 0);
+ if (ret)
+ break;
+
+ /* Verify it. */
+ p = buf;
+ for (i = 0; i < TEST_BUFFER_SIZE / sizeof(u32); i++) {
+ if (p[i] == i)
+ continue;
+
+ if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) ==
+ DMA_RWCTRL_WRITE_BNDRY_DISAB) {
+ tp->dma_rwctrl |= DMA_RWCTRL_WRITE_BNDRY_16;
+ tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+ break;
+ } else {
+ ret = -ENODEV;
+ goto out;
+ }
+ }
+
+ if (i == (TEST_BUFFER_SIZE / sizeof(u32))) {
+ /* Success. */
+ ret = 0;
+ break;
+ }
+ }
+
+out:
+ pci_free_consistent(tp->pdev, TEST_BUFFER_SIZE, buf, buf_dma);
+out_nofree:
+ return ret;
+}
+
+static void __devinit tg3_init_link_config(struct tg3 *tp)
+{
+ tp->link_config.advertising =
+ (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
+ ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
+ ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full |
+ ADVERTISED_Autoneg | ADVERTISED_MII);
+ tp->link_config.speed = SPEED_INVALID;
+ tp->link_config.duplex = DUPLEX_INVALID;
+ tp->link_config.autoneg = AUTONEG_ENABLE;
+ netif_carrier_off(tp->dev);
+ tp->link_config.active_speed = SPEED_INVALID;
+ tp->link_config.active_duplex = DUPLEX_INVALID;
+ tp->link_config.phy_is_low_power = 0;
+ tp->link_config.orig_speed = SPEED_INVALID;
+ tp->link_config.orig_duplex = DUPLEX_INVALID;
+ tp->link_config.orig_autoneg = AUTONEG_INVALID;
+}
+
+static void __devinit tg3_init_bufmgr_config(struct tg3 *tp)
+{
+ tp->bufmgr_config.mbuf_read_dma_low_water =
+ DEFAULT_MB_RDMA_LOW_WATER;
+ tp->bufmgr_config.mbuf_mac_rx_low_water =
+ DEFAULT_MB_MACRX_LOW_WATER;
+ tp->bufmgr_config.mbuf_high_water =
+ DEFAULT_MB_HIGH_WATER;
+
+ tp->bufmgr_config.mbuf_read_dma_low_water_jumbo =
+ DEFAULT_MB_RDMA_LOW_WATER_JUMBO;
+ tp->bufmgr_config.mbuf_mac_rx_low_water_jumbo =
+ DEFAULT_MB_MACRX_LOW_WATER_JUMBO;
+ tp->bufmgr_config.mbuf_high_water_jumbo =
+ DEFAULT_MB_HIGH_WATER_JUMBO;
+
+ tp->bufmgr_config.dma_low_water = DEFAULT_DMA_LOW_WATER;
+ tp->bufmgr_config.dma_high_water = DEFAULT_DMA_HIGH_WATER;
+}
+
+static char * __devinit tg3_phy_string(struct tg3 *tp)
+{
+ switch (tp->phy_id & PHY_ID_MASK) {
+ case PHY_ID_BCM5400: return "5400";
+ case PHY_ID_BCM5401: return "5401";
+ case PHY_ID_BCM5411: return "5411";
+ case PHY_ID_BCM5701: return "5701";
+ case PHY_ID_BCM5703: return "5703";
+ case PHY_ID_BCM5704: return "5704";
+ case PHY_ID_BCM8002: return "8002";
+ case PHY_ID_SERDES: return "serdes";
+ default: return "unknown";
+ };
+}
+
+static int __devinit tg3_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ static int tg3_version_printed = 0;
+ unsigned long tg3reg_base, tg3reg_len;
+ struct net_device *dev;
+ struct tg3 *tp;
+ int i, err, pci_using_dac, pm_cap;
+
+ if (tg3_version_printed++ == 0)
+ printk(KERN_INFO "%s", version);
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ printk(KERN_ERR PFX "Cannot enable PCI device, "
+ "aborting.\n");
+ return err;
+ }
+
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ printk(KERN_ERR PFX "Cannot find proper PCI device "
+ "base address, aborting.\n");
+ err = -ENODEV;
+ goto err_out_disable_pdev;
+ }
+
+ err = pci_request_regions(pdev, DRV_MODULE_NAME);
+ if (err) {
+ printk(KERN_ERR PFX "Cannot obtain PCI resources, "
+ "aborting.\n");
+ goto err_out_disable_pdev;
+ }
+
+ pci_set_master(pdev);
+
+ /* Find power-management capability. */
+ pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM);
+ if (pm_cap == 0) {
+ printk(KERN_ERR PFX "Cannot find PowerManagement capability, "
+ "aborting.\n");
+ goto err_out_free_res;
+ }
+
+ /* Configure DMA attributes. */
+ if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) {
+ pci_using_dac = 1;
+ } else {
+ err = pci_set_dma_mask(pdev, (u64) 0xffffffff);
+ if (err) {
+ printk(KERN_ERR PFX "No usable DMA configuration, "
+ "aborting.\n");
+ goto err_out_free_res;
+ }
+ pci_using_dac = 0;
+ }
+
+ tg3reg_base = pci_resource_start(pdev, 0);
+ tg3reg_len = pci_resource_len(pdev, 0);
+
+ dev = alloc_etherdev(sizeof(*tp));
+ if (!dev) {
+ printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
+ err = -ENOMEM;
+ goto err_out_free_res;
+ }
+
+ SET_MODULE_OWNER(dev);
+
+ if (pci_using_dac)
+ dev->features |= NETIF_F_HIGHDMA;
+#if TG3_VLAN_TAG_USED
+ dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+ dev->vlan_rx_register = tg3_vlan_rx_register;
+ dev->vlan_rx_kill_vid = tg3_vlan_rx_kill_vid;
+#endif
+#if TG3_DO_TSO != 0
+ dev->features |= NETIF_F_TSO;
+#endif
+
+ tp = dev->priv;
+ tp->pdev = pdev;
+ tp->dev = dev;
+ tp->pm_cap = pm_cap;
+ tp->mac_mode = TG3_DEF_MAC_MODE;
+ tp->rx_mode = TG3_DEF_RX_MODE;
+ tp->tx_mode = TG3_DEF_TX_MODE;
+ tp->mi_mode = MAC_MI_MODE_BASE;
+ if (tg3_debug > 0)
+ tp->msg_enable = tg3_debug;
+ else
+ tp->msg_enable = TG3_DEF_MSG_ENABLE;
+
+ /* The word/byte swap controls here control register access byte
+ * swapping. DMA data byte swapping is controlled in the GRC_MODE
+ * setting below.
+ */
+ tp->misc_host_ctrl =
+ MISC_HOST_CTRL_MASK_PCI_INT |
+ MISC_HOST_CTRL_WORD_SWAP |
+ MISC_HOST_CTRL_INDIR_ACCESS |
+ MISC_HOST_CTRL_PCISTATE_RW;
+
+ /* The NONFRM (non-frame) byte/word swap controls take effect
+ * on descriptor entries, anything which isn't packet data.
+ *
+ * The StrongARM chips on the board (one for tx, one for rx)
+ * are running in big-endian mode.
+ */
+ tp->grc_mode = (GRC_MODE_WSWAP_DATA | GRC_MODE_BSWAP_DATA |
+ GRC_MODE_WSWAP_NONFRM_DATA);
+#ifdef __BIG_ENDIAN
+ tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA;
+#endif
+ spin_lock_init(&tp->lock);
+ spin_lock_init(&tp->tx_lock);
+ spin_lock_init(&tp->indirect_lock);
+
+ tp->regs = (unsigned long) ioremap(tg3reg_base, tg3reg_len);
+ if (tp->regs == 0UL) {
+ printk(KERN_ERR PFX "Cannot map device registers, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_out_free_dev;
+ }
+
+ tg3_init_link_config(tp);
+
+ tg3_init_bufmgr_config(tp);
+
+ tp->rx_pending = TG3_DEF_RX_RING_PENDING;
+ tp->rx_jumbo_pending = TG3_DEF_RX_JUMBO_RING_PENDING;
+ tp->tx_pending = TG3_DEF_TX_RING_PENDING;
+
+ dev->open = tg3_open;
+ dev->stop = tg3_close;
+ dev->get_stats = tg3_get_stats;
+ dev->set_multicast_list = tg3_set_rx_mode;
+ dev->set_mac_address = tg3_set_mac_addr;
+ dev->do_ioctl = tg3_ioctl;
+ dev->tx_timeout = tg3_tx_timeout;
+#ifdef NAPI
+ dev->poll = tg3_poll;
+ dev->weight = 64;
+#endif
+ dev->watchdog_timeo = TG3_TX_TIMEOUT;
+ dev->change_mtu = tg3_change_mtu;
+ dev->irq = pdev->irq;
+
+ err = tg3_get_invariants(tp);
+ if (err) {
+ printk(KERN_ERR PFX "Problem fetching invariants of chip, "
+ "aborting.\n");
+ goto err_out_iounmap;
+ }
+
+ err = tg3_get_device_address(tp);
+ if (err) {
+ printk(KERN_ERR PFX "Could not obtain valid ethernet address, "
+ "aborting.\n");
+ goto err_out_iounmap;
+ }
+
+ err = tg3_test_dma(tp);
+ if (err) {
+ printk(KERN_ERR PFX "DMA engine test failed, aborting.\n");
+ goto err_out_iounmap;
+ }
+
+ /* Tigon3 can do ipv4 only... and some chips have buggy
+ * checksumming.
+ */
+ if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) {
+ dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+ tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS;
+ } else
+ tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;
+
+ err = register_netdev(dev);
+ if (err) {
+ printk(KERN_ERR PFX "Cannot register net device, "
+ "aborting.\n");
+ goto err_out_iounmap;
+ }
+
+ pci_set_drvdata(pdev, dev);
+
+ /* Now that we have fully setup the chip, save away a snapshot
+ * of the PCI config space. We need to restore this after
+ * GRC_MISC_CFG core clock resets and some resume events.
+ */
+ pci_save_state(tp->pdev, tp->pci_cfg_state);
+
+ printk(KERN_INFO "%s: Tigon3 [partno(%s) rev %04x PHY(%s)] (PCI%s:%s:%s) %sBaseT Ethernet ",
+ dev->name,
+ tp->board_part_number,
+ tp->pci_chip_rev_id,
+ tg3_phy_string(tp),
+ ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ? "X" : ""),
+ ((tp->tg3_flags & TG3_FLAG_PCI_HIGH_SPEED) ?
+ ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ? "133MHz" : "66MHz") :
+ ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ? "100MHz" : "33MHz")),
+ ((tp->tg3_flags & TG3_FLAG_PCI_32BIT) ? "32-bit" : "64-bit"),
+ (tp->tg3_flags & TG3_FLAG_10_100_ONLY) ? "10/100" : "10/100/1000");
+
+ for (i = 0; i < 6; i++)
+ printk("%2.2x%c", dev->dev_addr[i],
+ i == 5 ? '\n' : ':');
+
+ return 0;
+
+err_out_iounmap:
+ iounmap((void *) tp->regs);
+
+err_out_free_dev:
+ kfree(dev);
+
+err_out_free_res:
+ pci_release_regions(pdev);
+
+err_out_disable_pdev:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ return err;
+}
+
+static void __devexit tg3_remove_one(struct pci_dev *pdev)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+ if (dev) {
+ unregister_netdev(dev);
+ iounmap((void *) ((struct tg3 *)(dev->priv))->regs);
+ kfree(dev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ }
+}
+
+static int tg3_suspend(struct pci_dev *pdev, u32 state)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct tg3 *tp = dev->priv;
+ int err;
+
+ if (!netif_running(dev))
+ return 0;
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+ tg3_disable_ints(tp);
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ netif_device_detach(dev);
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+ tg3_halt(tp);
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ err = tg3_set_power_state(tp, state);
+ if (err) {
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tg3_init_rings(tp);
+ tg3_init_hw(tp);
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ netif_device_attach(dev);
+ }
+
+ return err;
+}
+
+static int tg3_resume(struct pci_dev *pdev)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct tg3 *tp = dev->priv;
+ int err;
+
+ if (!netif_running(dev))
+ return 0;
+
+ err = tg3_set_power_state(tp, 0);
+ if (err)
+ return err;
+
+ netif_device_attach(dev);
+
+ spin_lock_irq(&tp->lock);
+ spin_lock(&tp->tx_lock);
+
+ tg3_init_rings(tp);
+ tg3_init_hw(tp);
+ tg3_enable_ints(tp);
+
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irq(&tp->lock);
+
+ return 0;
+}
+
+static struct pci_driver tg3_driver = {
+ .name = DRV_MODULE_NAME,
+ .id_table = tg3_pci_tbl,
+ .probe = tg3_init_one,
+ .remove = __devexit_p(tg3_remove_one),
+ .suspend = tg3_suspend,
+ .resume = tg3_resume
+};
+
+static int __init tg3_init(void)
+{
+ return pci_module_init(&tg3_driver);
+}
+
+static void __exit tg3_cleanup(void)
+{
+ pci_unregister_driver(&tg3_driver);
+}
+
+module_init(tg3_init);
+module_exit(tg3_cleanup);
diff --git a/xen/drivers/net/tg3.h b/xen/drivers/net/tg3.h
new file mode 100644
index 0000000000..d816322d98
--- /dev/null
+++ b/xen/drivers/net/tg3.h
@@ -0,0 +1,1893 @@
+/* $Id: tg3.h,v 1.37.2.32 2002/03/11 12:18:18 davem Exp $
+ * tg3.h: Definitions for Broadcom Tigon3 ethernet driver.
+ *
+ * Copyright (C) 2001, 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
+ */
+
+#ifndef _T3_H
+#define _T3_H
+
+#define TG3_64BIT_REG_HIGH 0x00UL
+#define TG3_64BIT_REG_LOW 0x04UL
+
+/* Descriptor block info. */
+#define TG3_BDINFO_HOST_ADDR 0x0UL /* 64-bit */
+#define TG3_BDINFO_MAXLEN_FLAGS 0x8UL /* 32-bit */
+#define BDINFO_FLAGS_USE_EXT_RECV 0x00000001 /* ext rx_buffer_desc */
+#define BDINFO_FLAGS_DISABLED 0x00000002
+#define BDINFO_FLAGS_MAXLEN_MASK 0xffff0000
+#define BDINFO_FLAGS_MAXLEN_SHIFT 16
+#define TG3_BDINFO_NIC_ADDR 0xcUL /* 32-bit */
+#define TG3_BDINFO_SIZE 0x10UL
+
+#define RX_COPY_THRESHOLD 0 //256
+
+#define RX_STD_MAX_SIZE 1536
+#define RX_JUMBO_MAX_SIZE 0xdeadbeef /* XXX */
+
+/* First 256 bytes are a mirror of PCI config space. */
+#define TG3PCI_VENDOR 0x00000000
+#define TG3PCI_VENDOR_BROADCOM 0x14e4
+#define TG3PCI_DEVICE 0x00000002
+#define TG3PCI_DEVICE_TIGON3_1 0x1644 /* BCM5700 */
+#define TG3PCI_DEVICE_TIGON3_2 0x1645 /* BCM5701 */
+#define TG3PCI_DEVICE_TIGON3_3 0x1646 /* BCM5702 */
+#define TG3PCI_DEVICE_TIGON3_4 0x1647 /* BCM5703 */
+#define TG3PCI_COMMAND 0x00000004
+#define TG3PCI_STATUS 0x00000006
+#define TG3PCI_CCREVID 0x00000008
+#define TG3PCI_CACHELINESZ 0x0000000c
+#define TG3PCI_LATTIMER 0x0000000d
+#define TG3PCI_HEADERTYPE 0x0000000e
+#define TG3PCI_BIST 0x0000000f
+#define TG3PCI_BASE0_LOW 0x00000010
+#define TG3PCI_BASE0_HIGH 0x00000014
+/* 0x18 --> 0x2c unused */
+#define TG3PCI_SUBSYSVENID 0x0000002c
+#define TG3PCI_SUBSYSID 0x0000002e
+#define TG3PCI_ROMADDR 0x00000030
+#define TG3PCI_CAPLIST 0x00000034
+/* 0x35 --> 0x3c unused */
+#define TG3PCI_IRQ_LINE 0x0000003c
+#define TG3PCI_IRQ_PIN 0x0000003d
+#define TG3PCI_MIN_GNT 0x0000003e
+#define TG3PCI_MAX_LAT 0x0000003f
+#define TG3PCI_X_CAPS 0x00000040
+#define PCIX_CAPS_RELAXED_ORDERING 0x00020000
+#define PCIX_CAPS_SPLIT_MASK 0x00700000
+#define PCIX_CAPS_SPLIT_SHIFT 20
+#define PCIX_CAPS_BURST_MASK 0x000c0000
+#define PCIX_CAPS_BURST_SHIFT 18
+#define PCIX_CAPS_MAX_BURST_5704 2
+#define TG3PCI_PM_CAP_PTR 0x00000041
+#define TG3PCI_X_COMMAND 0x00000042
+#define TG3PCI_X_STATUS 0x00000044
+#define TG3PCI_PM_CAP_ID 0x00000048
+#define TG3PCI_VPD_CAP_PTR 0x00000049
+#define TG3PCI_PM_CAPS 0x0000004a
+#define TG3PCI_PM_CTRL_STAT 0x0000004c
+#define TG3PCI_BR_SUPP_EXT 0x0000004e
+#define TG3PCI_PM_DATA 0x0000004f
+#define TG3PCI_VPD_CAP_ID 0x00000050
+#define TG3PCI_MSI_CAP_PTR 0x00000051
+#define TG3PCI_VPD_ADDR_FLAG 0x00000052
+#define VPD_ADDR_FLAG_WRITE 0x00008000
+#define TG3PCI_VPD_DATA 0x00000054
+#define TG3PCI_MSI_CAP_ID 0x00000058
+#define TG3PCI_NXT_CAP_PTR 0x00000059
+#define TG3PCI_MSI_CTRL 0x0000005a
+#define TG3PCI_MSI_ADDR_LOW 0x0000005c
+#define TG3PCI_MSI_ADDR_HIGH 0x00000060
+#define TG3PCI_MSI_DATA 0x00000064
+/* 0x66 --> 0x68 unused */
+#define TG3PCI_MISC_HOST_CTRL 0x00000068
+#define MISC_HOST_CTRL_CLEAR_INT 0x00000001
+#define MISC_HOST_CTRL_MASK_PCI_INT 0x00000002
+#define MISC_HOST_CTRL_BYTE_SWAP 0x00000004
+#define MISC_HOST_CTRL_WORD_SWAP 0x00000008
+#define MISC_HOST_CTRL_PCISTATE_RW 0x00000010
+#define MISC_HOST_CTRL_CLKREG_RW 0x00000020
+#define MISC_HOST_CTRL_REGWORD_SWAP 0x00000040
+#define MISC_HOST_CTRL_INDIR_ACCESS 0x00000080
+#define MISC_HOST_CTRL_IRQ_MASK_MODE 0x00000100
+#define MISC_HOST_CTRL_TAGGED_STATUS 0x00000200
+#define MISC_HOST_CTRL_CHIPREV 0xffff0000
+#define MISC_HOST_CTRL_CHIPREV_SHIFT 16
+#define GET_CHIP_REV_ID(MISC_HOST_CTRL) \
+ (((MISC_HOST_CTRL) & MISC_HOST_CTRL_CHIPREV) >> \
+ MISC_HOST_CTRL_CHIPREV_SHIFT)
+#define CHIPREV_ID_5700_A0 0x7000
+#define CHIPREV_ID_5700_A1 0x7001
+#define CHIPREV_ID_5700_B0 0x7100
+#define CHIPREV_ID_5700_B1 0x7101
+#define CHIPREV_ID_5700_B3 0x7102
+#define CHIPREV_ID_5700_ALTIMA 0x7104
+#define CHIPREV_ID_5700_C0 0x7200
+#define CHIPREV_ID_5701_A0 0x0000
+#define CHIPREV_ID_5701_B0 0x0100
+#define CHIPREV_ID_5701_B2 0x0102
+#define CHIPREV_ID_5701_B5 0x0105
+#define CHIPREV_ID_5703_A0 0x1000
+#define CHIPREV_ID_5703_A1 0x1001
+#define CHIPREV_ID_5703_A2 0x1002
+#define CHIPREV_ID_5703_A3 0x1003
+#define CHIPREV_ID_5704_A0 0x2000
+#define CHIPREV_ID_5704_A1 0x2001
+#define CHIPREV_ID_5704_A2 0x2002
+#define GET_ASIC_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 12)
+#define ASIC_REV_5700 0x07
+#define ASIC_REV_5701 0x00
+#define ASIC_REV_5703 0x01
+#define ASIC_REV_5704 0x02
+#define GET_CHIP_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 8)
+#define CHIPREV_5700_AX 0x70
+#define CHIPREV_5700_BX 0x71
+#define CHIPREV_5700_CX 0x72
+#define CHIPREV_5701_AX 0x00
+#define GET_METAL_REV(CHIP_REV_ID) ((CHIP_REV_ID) & 0xff)
+#define METAL_REV_A0 0x00
+#define METAL_REV_A1 0x01
+#define METAL_REV_B0 0x00
+#define METAL_REV_B1 0x01
+#define METAL_REV_B2 0x02
+#define TG3PCI_DMA_RW_CTRL 0x0000006c
+#define DMA_RWCTRL_MIN_DMA 0x000000ff
+#define DMA_RWCTRL_MIN_DMA_SHIFT 0
+#define DMA_RWCTRL_READ_BNDRY_MASK 0x00000700
+#define DMA_RWCTRL_READ_BNDRY_DISAB 0x00000000
+#define DMA_RWCTRL_READ_BNDRY_16 0x00000100
+#define DMA_RWCTRL_READ_BNDRY_32 0x00000200
+#define DMA_RWCTRL_READ_BNDRY_64 0x00000300
+#define DMA_RWCTRL_READ_BNDRY_128 0x00000400
+#define DMA_RWCTRL_READ_BNDRY_256 0x00000500
+#define DMA_RWCTRL_READ_BNDRY_512 0x00000600
+#define DMA_RWCTRL_READ_BNDRY_1024 0x00000700
+#define DMA_RWCTRL_WRITE_BNDRY_MASK 0x00003800
+#define DMA_RWCTRL_WRITE_BNDRY_DISAB 0x00000000
+#define DMA_RWCTRL_WRITE_BNDRY_16 0x00000800
+#define DMA_RWCTRL_WRITE_BNDRY_32 0x00001000
+#define DMA_RWCTRL_WRITE_BNDRY_64 0x00001800
+#define DMA_RWCTRL_WRITE_BNDRY_128 0x00002000
+#define DMA_RWCTRL_WRITE_BNDRY_256 0x00002800
+#define DMA_RWCTRL_WRITE_BNDRY_512 0x00003000
+#define DMA_RWCTRL_WRITE_BNDRY_1024 0x00003800
+#define DMA_RWCTRL_ONE_DMA 0x00004000
+#define DMA_RWCTRL_READ_WATER 0x00070000
+#define DMA_RWCTRL_READ_WATER_SHIFT 16
+#define DMA_RWCTRL_WRITE_WATER 0x00380000
+#define DMA_RWCTRL_WRITE_WATER_SHIFT 19
+#define DMA_RWCTRL_USE_MEM_READ_MULT 0x00400000
+#define DMA_RWCTRL_ASSERT_ALL_BE 0x00800000
+#define DMA_RWCTRL_PCI_READ_CMD 0x0f000000
+#define DMA_RWCTRL_PCI_READ_CMD_SHIFT 24
+#define DMA_RWCTRL_PCI_WRITE_CMD 0xf0000000
+#define DMA_RWCTRL_PCI_WRITE_CMD_SHIFT 28
+#define TG3PCI_PCISTATE 0x00000070
+#define PCISTATE_FORCE_RESET 0x00000001
+#define PCISTATE_INT_NOT_ACTIVE 0x00000002
+#define PCISTATE_CONV_PCI_MODE 0x00000004
+#define PCISTATE_BUS_SPEED_HIGH 0x00000008
+#define PCISTATE_BUS_32BIT 0x00000010
+#define PCISTATE_ROM_ENABLE 0x00000020
+#define PCISTATE_ROM_RETRY_ENABLE 0x00000040
+#define PCISTATE_FLAT_VIEW 0x00000100
+#define PCISTATE_RETRY_SAME_DMA 0x00002000
+#define TG3PCI_CLOCK_CTRL 0x00000074
+#define CLOCK_CTRL_CORECLK_DISABLE 0x00000200
+#define CLOCK_CTRL_RXCLK_DISABLE 0x00000400
+#define CLOCK_CTRL_TXCLK_DISABLE 0x00000800
+#define CLOCK_CTRL_ALTCLK 0x00001000
+#define CLOCK_CTRL_PWRDOWN_PLL133 0x00008000
+#define CLOCK_CTRL_44MHZ_CORE 0x00040000
+#define CLOCK_CTRL_DELAY_PCI_GRANT 0x80000000
+#define TG3PCI_REG_BASE_ADDR 0x00000078
+#define TG3PCI_MEM_WIN_BASE_ADDR 0x0000007c
+#define TG3PCI_REG_DATA 0x00000080
+#define TG3PCI_MEM_WIN_DATA 0x00000084
+#define TG3PCI_MODE_CTRL 0x00000088
+#define TG3PCI_MISC_CFG 0x0000008c
+#define TG3PCI_MISC_LOCAL_CTRL 0x00000090
+/* 0x94 --> 0x98 unused */
+#define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */
+#define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */
+#define TG3PCI_SND_PROD_IDX 0x000000a8 /* 64-bit */
+/* 0xb0 --> 0x100 unused */
+
+/* 0x100 --> 0x200 unused */
+
+/* Mailbox registers */
+#define MAILBOX_INTERRUPT_0 0x00000200 /* 64-bit */
+#define MAILBOX_INTERRUPT_1 0x00000208 /* 64-bit */
+#define MAILBOX_INTERRUPT_2 0x00000210 /* 64-bit */
+#define MAILBOX_INTERRUPT_3 0x00000218 /* 64-bit */
+#define MAILBOX_GENERAL_0 0x00000220 /* 64-bit */
+#define MAILBOX_GENERAL_1 0x00000228 /* 64-bit */
+#define MAILBOX_GENERAL_2 0x00000230 /* 64-bit */
+#define MAILBOX_GENERAL_3 0x00000238 /* 64-bit */
+#define MAILBOX_GENERAL_4 0x00000240 /* 64-bit */
+#define MAILBOX_GENERAL_5 0x00000248 /* 64-bit */
+#define MAILBOX_GENERAL_6 0x00000250 /* 64-bit */
+#define MAILBOX_GENERAL_7 0x00000258 /* 64-bit */
+#define MAILBOX_RELOAD_STAT 0x00000260 /* 64-bit */
+#define MAILBOX_RCV_STD_PROD_IDX 0x00000268 /* 64-bit */
+#define MAILBOX_RCV_JUMBO_PROD_IDX 0x00000270 /* 64-bit */
+#define MAILBOX_RCV_MINI_PROD_IDX 0x00000278 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_0 0x00000280 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_1 0x00000288 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_2 0x00000290 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_3 0x00000298 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_4 0x000002a0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_5 0x000002a8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_6 0x000002b0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_7 0x000002b8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_8 0x000002c0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_9 0x000002c8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_10 0x000002d0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_11 0x000002d8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_12 0x000002e0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_13 0x000002e8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_14 0x000002f0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_15 0x000002f8 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_0 0x00000300 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_1 0x00000308 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_2 0x00000310 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_3 0x00000318 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_4 0x00000320 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_5 0x00000328 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_6 0x00000330 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_7 0x00000338 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_8 0x00000340 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_9 0x00000348 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_10 0x00000350 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_11 0x00000358 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_12 0x00000360 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_13 0x00000368 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_14 0x00000370 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_15 0x00000378 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_0 0x00000380 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_1 0x00000388 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_2 0x00000390 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_3 0x00000398 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_4 0x000003a0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_5 0x000003a8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_6 0x000003b0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_7 0x000003b8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_8 0x000003c0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_9 0x000003c8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_10 0x000003d0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_11 0x000003d8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_12 0x000003e0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_13 0x000003e8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_14 0x000003f0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_15 0x000003f8 /* 64-bit */
+
+/* MAC control registers */
+#define MAC_MODE 0x00000400
+#define MAC_MODE_RESET 0x00000001
+#define MAC_MODE_HALF_DUPLEX 0x00000002
+#define MAC_MODE_PORT_MODE_MASK 0x0000000c
+#define MAC_MODE_PORT_MODE_TBI 0x0000000c
+#define MAC_MODE_PORT_MODE_GMII 0x00000008
+#define MAC_MODE_PORT_MODE_MII 0x00000004
+#define MAC_MODE_PORT_MODE_NONE 0x00000000
+#define MAC_MODE_PORT_INT_LPBACK 0x00000010
+#define MAC_MODE_TAGGED_MAC_CTRL 0x00000080
+#define MAC_MODE_TX_BURSTING 0x00000100
+#define MAC_MODE_MAX_DEFER 0x00000200
+#define MAC_MODE_LINK_POLARITY 0x00000400
+#define MAC_MODE_RXSTAT_ENABLE 0x00000800
+#define MAC_MODE_RXSTAT_CLEAR 0x00001000
+#define MAC_MODE_RXSTAT_FLUSH 0x00002000
+#define MAC_MODE_TXSTAT_ENABLE 0x00004000
+#define MAC_MODE_TXSTAT_CLEAR 0x00008000
+#define MAC_MODE_TXSTAT_FLUSH 0x00010000
+#define MAC_MODE_SEND_CONFIGS 0x00020000
+#define MAC_MODE_MAGIC_PKT_ENABLE 0x00040000
+#define MAC_MODE_ACPI_ENABLE 0x00080000
+#define MAC_MODE_MIP_ENABLE 0x00100000
+#define MAC_MODE_TDE_ENABLE 0x00200000
+#define MAC_MODE_RDE_ENABLE 0x00400000
+#define MAC_MODE_FHDE_ENABLE 0x00800000
+#define MAC_STATUS 0x00000404
+#define MAC_STATUS_PCS_SYNCED 0x00000001
+#define MAC_STATUS_SIGNAL_DET 0x00000002
+#define MAC_STATUS_RCVD_CFG 0x00000004
+#define MAC_STATUS_CFG_CHANGED 0x00000008
+#define MAC_STATUS_SYNC_CHANGED 0x00000010
+#define MAC_STATUS_PORT_DEC_ERR 0x00000400
+#define MAC_STATUS_LNKSTATE_CHANGED 0x00001000
+#define MAC_STATUS_MI_COMPLETION 0x00400000
+#define MAC_STATUS_MI_INTERRUPT 0x00800000
+#define MAC_STATUS_AP_ERROR 0x01000000
+#define MAC_STATUS_ODI_ERROR 0x02000000
+#define MAC_STATUS_RXSTAT_OVERRUN 0x04000000
+#define MAC_STATUS_TXSTAT_OVERRUN 0x08000000
+#define MAC_EVENT 0x00000408
+#define MAC_EVENT_PORT_DECODE_ERR 0x00000400
+#define MAC_EVENT_LNKSTATE_CHANGED 0x00001000
+#define MAC_EVENT_MI_COMPLETION 0x00400000
+#define MAC_EVENT_MI_INTERRUPT 0x00800000
+#define MAC_EVENT_AP_ERROR 0x01000000
+#define MAC_EVENT_ODI_ERROR 0x02000000
+#define MAC_EVENT_RXSTAT_OVERRUN 0x04000000
+#define MAC_EVENT_TXSTAT_OVERRUN 0x08000000
+#define MAC_LED_CTRL 0x0000040c
+#define LED_CTRL_LNKLED_OVERRIDE 0x00000001
+#define LED_CTRL_1000MBPS_ON 0x00000002
+#define LED_CTRL_100MBPS_ON 0x00000004
+#define LED_CTRL_10MBPS_ON 0x00000008
+#define LED_CTRL_TRAFFIC_OVERRIDE 0x00000010
+#define LED_CTRL_TRAFFIC_BLINK 0x00000020
+#define LED_CTRL_TRAFFIC_LED 0x00000040
+#define LED_CTRL_1000MBPS_STATUS 0x00000080
+#define LED_CTRL_100MBPS_STATUS 0x00000100
+#define LED_CTRL_10MBPS_STATUS 0x00000200
+#define LED_CTRL_TRAFFIC_STATUS 0x00000400
+#define LED_CTRL_MAC_MODE 0x00000000
+#define LED_CTRL_PHY_MODE_1 0x00000800
+#define LED_CTRL_PHY_MODE_2 0x00001000
+#define LED_CTRL_BLINK_RATE_MASK 0x7ff80000
+#define LED_CTRL_BLINK_RATE_SHIFT 19
+#define LED_CTRL_BLINK_PER_OVERRIDE 0x00080000
+#define LED_CTRL_BLINK_RATE_OVERRIDE 0x80000000
+#define MAC_ADDR_0_HIGH 0x00000410 /* upper 2 bytes */
+#define MAC_ADDR_0_LOW 0x00000414 /* lower 4 bytes */
+#define MAC_ADDR_1_HIGH 0x00000418 /* upper 2 bytes */
+#define MAC_ADDR_1_LOW 0x0000041c /* lower 4 bytes */
+#define MAC_ADDR_2_HIGH 0x00000420 /* upper 2 bytes */
+#define MAC_ADDR_2_LOW 0x00000424 /* lower 4 bytes */
+#define MAC_ADDR_3_HIGH 0x00000428 /* upper 2 bytes */
+#define MAC_ADDR_3_LOW 0x0000042c /* lower 4 bytes */
+#define MAC_ACPI_MBUF_PTR 0x00000430
+#define MAC_ACPI_LEN_OFFSET 0x00000434
+#define ACPI_LENOFF_LEN_MASK 0x0000ffff
+#define ACPI_LENOFF_LEN_SHIFT 0
+#define ACPI_LENOFF_OFF_MASK 0x0fff0000
+#define ACPI_LENOFF_OFF_SHIFT 16
+#define MAC_TX_BACKOFF_SEED 0x00000438
+#define TX_BACKOFF_SEED_MASK 0x000003ff
+#define MAC_RX_MTU_SIZE 0x0000043c
+#define RX_MTU_SIZE_MASK 0x0000ffff
+#define MAC_PCS_TEST 0x00000440
+#define PCS_TEST_PATTERN_MASK 0x000fffff
+#define PCS_TEST_PATTERN_SHIFT 0
+#define PCS_TEST_ENABLE 0x00100000
+#define MAC_TX_AUTO_NEG 0x00000444
+#define TX_AUTO_NEG_MASK 0x0000ffff
+#define TX_AUTO_NEG_SHIFT 0
+#define MAC_RX_AUTO_NEG 0x00000448
+#define RX_AUTO_NEG_MASK 0x0000ffff
+#define RX_AUTO_NEG_SHIFT 0
+#define MAC_MI_COM 0x0000044c
+#define MI_COM_CMD_MASK 0x0c000000
+#define MI_COM_CMD_WRITE 0x04000000
+#define MI_COM_CMD_READ 0x08000000
+#define MI_COM_READ_FAILED 0x10000000
+#define MI_COM_START 0x20000000
+#define MI_COM_BUSY 0x20000000
+#define MI_COM_PHY_ADDR_MASK 0x03e00000
+#define MI_COM_PHY_ADDR_SHIFT 21
+#define MI_COM_REG_ADDR_MASK 0x001f0000
+#define MI_COM_REG_ADDR_SHIFT 16
+#define MI_COM_DATA_MASK 0x0000ffff
+#define MAC_MI_STAT 0x00000450
+#define MAC_MI_STAT_LNKSTAT_ATTN_ENAB 0x00000001
+#define MAC_MI_MODE 0x00000454
+#define MAC_MI_MODE_CLK_10MHZ 0x00000001
+#define MAC_MI_MODE_SHORT_PREAMBLE 0x00000002
+#define MAC_MI_MODE_AUTO_POLL 0x00000010
+#define MAC_MI_MODE_CORE_CLK_62MHZ 0x00008000
+#define MAC_MI_MODE_BASE 0x000c0000 /* XXX magic values XXX */
+#define MAC_AUTO_POLL_STATUS 0x00000458
+#define MAC_AUTO_POLL_ERROR 0x00000001
+#define MAC_TX_MODE 0x0000045c
+#define TX_MODE_RESET 0x00000001
+#define TX_MODE_ENABLE 0x00000002
+#define TX_MODE_FLOW_CTRL_ENABLE 0x00000010
+#define TX_MODE_BIG_BCKOFF_ENABLE 0x00000020
+#define TX_MODE_LONG_PAUSE_ENABLE 0x00000040
+#define MAC_TX_STATUS 0x00000460
+#define TX_STATUS_XOFFED 0x00000001
+#define TX_STATUS_SENT_XOFF 0x00000002
+#define TX_STATUS_SENT_XON 0x00000004
+#define TX_STATUS_LINK_UP 0x00000008
+#define TX_STATUS_ODI_UNDERRUN 0x00000010
+#define TX_STATUS_ODI_OVERRUN 0x00000020
+#define MAC_TX_LENGTHS 0x00000464
+#define TX_LENGTHS_SLOT_TIME_MASK 0x000000ff
+#define TX_LENGTHS_SLOT_TIME_SHIFT 0
+#define TX_LENGTHS_IPG_MASK 0x00000f00
+#define TX_LENGTHS_IPG_SHIFT 8
+#define TX_LENGTHS_IPG_CRS_MASK 0x00003000
+#define TX_LENGTHS_IPG_CRS_SHIFT 12
+#define MAC_RX_MODE 0x00000468
+#define RX_MODE_RESET 0x00000001
+#define RX_MODE_ENABLE 0x00000002
+#define RX_MODE_FLOW_CTRL_ENABLE 0x00000004
+#define RX_MODE_KEEP_MAC_CTRL 0x00000008
+#define RX_MODE_KEEP_PAUSE 0x00000010
+#define RX_MODE_ACCEPT_OVERSIZED 0x00000020
+#define RX_MODE_ACCEPT_RUNTS 0x00000040
+#define RX_MODE_LEN_CHECK 0x00000080
+#define RX_MODE_PROMISC 0x00000100
+#define RX_MODE_NO_CRC_CHECK 0x00000200
+#define RX_MODE_KEEP_VLAN_TAG 0x00000400
+#define MAC_RX_STATUS 0x0000046c
+#define RX_STATUS_REMOTE_TX_XOFFED 0x00000001
+#define RX_STATUS_XOFF_RCVD 0x00000002
+#define RX_STATUS_XON_RCVD 0x00000004
+#define MAC_HASH_REG_0 0x00000470
+#define MAC_HASH_REG_1 0x00000474
+#define MAC_HASH_REG_2 0x00000478
+#define MAC_HASH_REG_3 0x0000047c
+#define MAC_RCV_RULE_0 0x00000480
+#define MAC_RCV_VALUE_0 0x00000484
+#define MAC_RCV_RULE_1 0x00000488
+#define MAC_RCV_VALUE_1 0x0000048c
+#define MAC_RCV_RULE_2 0x00000490
+#define MAC_RCV_VALUE_2 0x00000494
+#define MAC_RCV_RULE_3 0x00000498
+#define MAC_RCV_VALUE_3 0x0000049c
+#define MAC_RCV_RULE_4 0x000004a0
+#define MAC_RCV_VALUE_4 0x000004a4
+#define MAC_RCV_RULE_5 0x000004a8
+#define MAC_RCV_VALUE_5 0x000004ac
+#define MAC_RCV_RULE_6 0x000004b0
+#define MAC_RCV_VALUE_6 0x000004b4
+#define MAC_RCV_RULE_7 0x000004b8
+#define MAC_RCV_VALUE_7 0x000004bc
+#define MAC_RCV_RULE_8 0x000004c0
+#define MAC_RCV_VALUE_8 0x000004c4
+#define MAC_RCV_RULE_9 0x000004c8
+#define MAC_RCV_VALUE_9 0x000004cc
+#define MAC_RCV_RULE_10 0x000004d0
+#define MAC_RCV_VALUE_10 0x000004d4
+#define MAC_RCV_RULE_11 0x000004d8
+#define MAC_RCV_VALUE_11 0x000004dc
+#define MAC_RCV_RULE_12 0x000004e0
+#define MAC_RCV_VALUE_12 0x000004e4
+#define MAC_RCV_RULE_13 0x000004e8
+#define MAC_RCV_VALUE_13 0x000004ec
+#define MAC_RCV_RULE_14 0x000004f0
+#define MAC_RCV_VALUE_14 0x000004f4
+#define MAC_RCV_RULE_15 0x000004f8
+#define MAC_RCV_VALUE_15 0x000004fc
+#define RCV_RULE_DISABLE_MASK 0x7fffffff
+#define MAC_RCV_RULE_CFG 0x00000500
+#define RCV_RULE_CFG_DEFAULT_CLASS 0x00000008
+/* 0x504 --> 0x590 unused */
+#define MAC_SERDES_CFG 0x00000590
+#define MAC_SERDES_STAT 0x00000594
+/* 0x598 --> 0x600 unused */
+#define MAC_TX_MAC_STATE_BASE 0x00000600 /* 16 bytes */
+#define MAC_RX_MAC_STATE_BASE 0x00000610 /* 20 bytes */
+/* 0x624 --> 0x800 unused */
+#define MAC_RX_STATS_BASE 0x00000800 /* 26 32-bit words */
+/* 0x868 --> 0x880 unused */
+#define MAC_TX_STATS_BASE 0x00000880 /* 28 32-bit words */
+/* 0x8f0 --> 0xc00 unused */
+
+/* Send data initiator control registers */
+#define SNDDATAI_MODE 0x00000c00
+#define SNDDATAI_MODE_RESET 0x00000001
+#define SNDDATAI_MODE_ENABLE 0x00000002
+#define SNDDATAI_MODE_STAT_OFLOW_ENAB 0x00000004
+#define SNDDATAI_STATUS 0x00000c04
+#define SNDDATAI_STATUS_STAT_OFLOW 0x00000004
+#define SNDDATAI_STATSCTRL 0x00000c08
+#define SNDDATAI_SCTRL_ENABLE 0x00000001
+#define SNDDATAI_SCTRL_FASTUPD 0x00000002
+#define SNDDATAI_SCTRL_CLEAR 0x00000004
+#define SNDDATAI_SCTRL_FLUSH 0x00000008
+#define SNDDATAI_SCTRL_FORCE_ZERO 0x00000010
+#define SNDDATAI_STATSENAB 0x00000c0c
+#define SNDDATAI_STATSINCMASK 0x00000c10
+/* 0xc14 --> 0xc80 unused */
+#define SNDDATAI_COS_CNT_0 0x00000c80
+#define SNDDATAI_COS_CNT_1 0x00000c84
+#define SNDDATAI_COS_CNT_2 0x00000c88
+#define SNDDATAI_COS_CNT_3 0x00000c8c
+#define SNDDATAI_COS_CNT_4 0x00000c90
+#define SNDDATAI_COS_CNT_5 0x00000c94
+#define SNDDATAI_COS_CNT_6 0x00000c98
+#define SNDDATAI_COS_CNT_7 0x00000c9c
+#define SNDDATAI_COS_CNT_8 0x00000ca0
+#define SNDDATAI_COS_CNT_9 0x00000ca4
+#define SNDDATAI_COS_CNT_10 0x00000ca8
+#define SNDDATAI_COS_CNT_11 0x00000cac
+#define SNDDATAI_COS_CNT_12 0x00000cb0
+#define SNDDATAI_COS_CNT_13 0x00000cb4
+#define SNDDATAI_COS_CNT_14 0x00000cb8
+#define SNDDATAI_COS_CNT_15 0x00000cbc
+#define SNDDATAI_DMA_RDQ_FULL_CNT 0x00000cc0
+#define SNDDATAI_DMA_PRIO_RDQ_FULL_CNT 0x00000cc4
+#define SNDDATAI_SDCQ_FULL_CNT 0x00000cc8
+#define SNDDATAI_NICRNG_SSND_PIDX_CNT 0x00000ccc
+#define SNDDATAI_STATS_UPDATED_CNT 0x00000cd0
+#define SNDDATAI_INTERRUPTS_CNT 0x00000cd4
+#define SNDDATAI_AVOID_INTERRUPTS_CNT 0x00000cd8
+#define SNDDATAI_SND_THRESH_HIT_CNT 0x00000cdc
+/* 0xce0 --> 0x1000 unused */
+
+/* Send data completion control registers */
+#define SNDDATAC_MODE 0x00001000
+#define SNDDATAC_MODE_RESET 0x00000001
+#define SNDDATAC_MODE_ENABLE 0x00000002
+/* 0x1004 --> 0x1400 unused */
+
+/* Send BD ring selector */
+#define SNDBDS_MODE 0x00001400
+#define SNDBDS_MODE_RESET 0x00000001
+#define SNDBDS_MODE_ENABLE 0x00000002
+#define SNDBDS_MODE_ATTN_ENABLE 0x00000004
+#define SNDBDS_STATUS 0x00001404
+#define SNDBDS_STATUS_ERROR_ATTN 0x00000004
+#define SNDBDS_HWDIAG 0x00001408
+/* 0x140c --> 0x1440 */
+#define SNDBDS_SEL_CON_IDX_0 0x00001440
+#define SNDBDS_SEL_CON_IDX_1 0x00001444
+#define SNDBDS_SEL_CON_IDX_2 0x00001448
+#define SNDBDS_SEL_CON_IDX_3 0x0000144c
+#define SNDBDS_SEL_CON_IDX_4 0x00001450
+#define SNDBDS_SEL_CON_IDX_5 0x00001454
+#define SNDBDS_SEL_CON_IDX_6 0x00001458
+#define SNDBDS_SEL_CON_IDX_7 0x0000145c
+#define SNDBDS_SEL_CON_IDX_8 0x00001460
+#define SNDBDS_SEL_CON_IDX_9 0x00001464
+#define SNDBDS_SEL_CON_IDX_10 0x00001468
+#define SNDBDS_SEL_CON_IDX_11 0x0000146c
+#define SNDBDS_SEL_CON_IDX_12 0x00001470
+#define SNDBDS_SEL_CON_IDX_13 0x00001474
+#define SNDBDS_SEL_CON_IDX_14 0x00001478
+#define SNDBDS_SEL_CON_IDX_15 0x0000147c
+/* 0x1480 --> 0x1800 unused */
+
+/* Send BD initiator control registers */
+#define SNDBDI_MODE 0x00001800
+#define SNDBDI_MODE_RESET 0x00000001
+#define SNDBDI_MODE_ENABLE 0x00000002
+#define SNDBDI_MODE_ATTN_ENABLE 0x00000004
+#define SNDBDI_STATUS 0x00001804
+#define SNDBDI_STATUS_ERROR_ATTN 0x00000004
+#define SNDBDI_IN_PROD_IDX_0 0x00001808
+#define SNDBDI_IN_PROD_IDX_1 0x0000180c
+#define SNDBDI_IN_PROD_IDX_2 0x00001810
+#define SNDBDI_IN_PROD_IDX_3 0x00001814
+#define SNDBDI_IN_PROD_IDX_4 0x00001818
+#define SNDBDI_IN_PROD_IDX_5 0x0000181c
+#define SNDBDI_IN_PROD_IDX_6 0x00001820
+#define SNDBDI_IN_PROD_IDX_7 0x00001824
+#define SNDBDI_IN_PROD_IDX_8 0x00001828
+#define SNDBDI_IN_PROD_IDX_9 0x0000182c
+#define SNDBDI_IN_PROD_IDX_10 0x00001830
+#define SNDBDI_IN_PROD_IDX_11 0x00001834
+#define SNDBDI_IN_PROD_IDX_12 0x00001838
+#define SNDBDI_IN_PROD_IDX_13 0x0000183c
+#define SNDBDI_IN_PROD_IDX_14 0x00001840
+#define SNDBDI_IN_PROD_IDX_15 0x00001844
+/* 0x1848 --> 0x1c00 unused */
+
+/* Send BD completion control registers */
+#define SNDBDC_MODE 0x00001c00
+#define SNDBDC_MODE_RESET 0x00000001
+#define SNDBDC_MODE_ENABLE 0x00000002
+#define SNDBDC_MODE_ATTN_ENABLE 0x00000004
+/* 0x1c04 --> 0x2000 unused */
+
+/* Receive list placement control registers */
+#define RCVLPC_MODE 0x00002000
+#define RCVLPC_MODE_RESET 0x00000001
+#define RCVLPC_MODE_ENABLE 0x00000002
+#define RCVLPC_MODE_CLASS0_ATTN_ENAB 0x00000004
+#define RCVLPC_MODE_MAPOOR_AATTN_ENAB 0x00000008
+#define RCVLPC_MODE_STAT_OFLOW_ENAB 0x00000010
+#define RCVLPC_STATUS 0x00002004
+#define RCVLPC_STATUS_CLASS0 0x00000004
+#define RCVLPC_STATUS_MAPOOR 0x00000008
+#define RCVLPC_STATUS_STAT_OFLOW 0x00000010
+#define RCVLPC_LOCK 0x00002008
+#define RCVLPC_LOCK_REQ_MASK 0x0000ffff
+#define RCVLPC_LOCK_REQ_SHIFT 0
+#define RCVLPC_LOCK_GRANT_MASK 0xffff0000
+#define RCVLPC_LOCK_GRANT_SHIFT 16
+#define RCVLPC_NON_EMPTY_BITS 0x0000200c
+#define RCVLPC_NON_EMPTY_BITS_MASK 0x0000ffff
+#define RCVLPC_CONFIG 0x00002010
+#define RCVLPC_STATSCTRL 0x00002014
+#define RCVLPC_STATSCTRL_ENABLE 0x00000001
+#define RCVLPC_STATSCTRL_FASTUPD 0x00000002
+#define RCVLPC_STATS_ENABLE 0x00002018
+#define RCVLPC_STATS_INCMASK 0x0000201c
+/* 0x2020 --> 0x2100 unused */
+#define RCVLPC_SELLST_BASE 0x00002100 /* 16 16-byte entries */
+#define SELLST_TAIL 0x00000004
+#define SELLST_CONT 0x00000008
+#define SELLST_UNUSED 0x0000000c
+#define RCVLPC_COS_CNTL_BASE 0x00002200 /* 16 4-byte entries */
+#define RCVLPC_DROP_FILTER_CNT 0x00002240
+#define RCVLPC_DMA_WQ_FULL_CNT 0x00002244
+#define RCVLPC_DMA_HIPRIO_WQ_FULL_CNT 0x00002248
+#define RCVLPC_NO_RCV_BD_CNT 0x0000224c
+#define RCVLPC_IN_DISCARDS_CNT 0x00002250
+#define RCVLPC_IN_ERRORS_CNT 0x00002254
+#define RCVLPC_RCV_THRESH_HIT_CNT 0x00002258
+/* 0x225c --> 0x2400 unused */
+
+/* Receive Data and Receive BD Initiator Control */
+#define RCVDBDI_MODE 0x00002400
+#define RCVDBDI_MODE_RESET 0x00000001
+#define RCVDBDI_MODE_ENABLE 0x00000002
+#define RCVDBDI_MODE_JUMBOBD_NEEDED 0x00000004
+#define RCVDBDI_MODE_FRM_TOO_BIG 0x00000008
+#define RCVDBDI_MODE_INV_RING_SZ 0x00000010
+#define RCVDBDI_STATUS 0x00002404
+#define RCVDBDI_STATUS_JUMBOBD_NEEDED 0x00000004
+#define RCVDBDI_STATUS_FRM_TOO_BIG 0x00000008
+#define RCVDBDI_STATUS_INV_RING_SZ 0x00000010
+#define RCVDBDI_SPLIT_FRAME_MINSZ 0x00002408
+/* 0x240c --> 0x2440 unused */
+#define RCVDBDI_JUMBO_BD 0x00002440 /* TG3_BDINFO_... */
+#define RCVDBDI_STD_BD 0x00002450 /* TG3_BDINFO_... */
+#define RCVDBDI_MINI_BD 0x00002460 /* TG3_BDINFO_... */
+#define RCVDBDI_JUMBO_CON_IDX 0x00002470
+#define RCVDBDI_STD_CON_IDX 0x00002474
+#define RCVDBDI_MINI_CON_IDX 0x00002478
+/* 0x247c --> 0x2480 unused */
+#define RCVDBDI_BD_PROD_IDX_0 0x00002480
+#define RCVDBDI_BD_PROD_IDX_1 0x00002484
+#define RCVDBDI_BD_PROD_IDX_2 0x00002488
+#define RCVDBDI_BD_PROD_IDX_3 0x0000248c
+#define RCVDBDI_BD_PROD_IDX_4 0x00002490
+#define RCVDBDI_BD_PROD_IDX_5 0x00002494
+#define RCVDBDI_BD_PROD_IDX_6 0x00002498
+#define RCVDBDI_BD_PROD_IDX_7 0x0000249c
+#define RCVDBDI_BD_PROD_IDX_8 0x000024a0
+#define RCVDBDI_BD_PROD_IDX_9 0x000024a4
+#define RCVDBDI_BD_PROD_IDX_10 0x000024a8
+#define RCVDBDI_BD_PROD_IDX_11 0x000024ac
+#define RCVDBDI_BD_PROD_IDX_12 0x000024b0
+#define RCVDBDI_BD_PROD_IDX_13 0x000024b4
+#define RCVDBDI_BD_PROD_IDX_14 0x000024b8
+#define RCVDBDI_BD_PROD_IDX_15 0x000024bc
+#define RCVDBDI_HWDIAG 0x000024c0
+/* 0x24c4 --> 0x2800 unused */
+
+/* Receive Data Completion Control */
+#define RCVDCC_MODE 0x00002800
+#define RCVDCC_MODE_RESET 0x00000001
+#define RCVDCC_MODE_ENABLE 0x00000002
+#define RCVDCC_MODE_ATTN_ENABLE 0x00000004
+/* 0x2804 --> 0x2c00 unused */
+
+/* Receive BD Initiator Control Registers */
+#define RCVBDI_MODE 0x00002c00
+#define RCVBDI_MODE_RESET 0x00000001
+#define RCVBDI_MODE_ENABLE 0x00000002
+#define RCVBDI_MODE_RCB_ATTN_ENAB 0x00000004
+#define RCVBDI_STATUS 0x00002c04
+#define RCVBDI_STATUS_RCB_ATTN 0x00000004
+#define RCVBDI_JUMBO_PROD_IDX 0x00002c08
+#define RCVBDI_STD_PROD_IDX 0x00002c0c
+#define RCVBDI_MINI_PROD_IDX 0x00002c10
+#define RCVBDI_MINI_THRESH 0x00002c14
+#define RCVBDI_STD_THRESH 0x00002c18
+#define RCVBDI_JUMBO_THRESH 0x00002c1c
+/* 0x2c20 --> 0x3000 unused */
+
+/* Receive BD Completion Control Registers */
+#define RCVCC_MODE 0x00003000
+#define RCVCC_MODE_RESET 0x00000001
+#define RCVCC_MODE_ENABLE 0x00000002
+#define RCVCC_MODE_ATTN_ENABLE 0x00000004
+#define RCVCC_STATUS 0x00003004
+#define RCVCC_STATUS_ERROR_ATTN 0x00000004
+#define RCVCC_JUMP_PROD_IDX 0x00003008
+#define RCVCC_STD_PROD_IDX 0x0000300c
+#define RCVCC_MINI_PROD_IDX 0x00003010
+/* 0x3014 --> 0x3400 unused */
+
+/* Receive list selector control registers */
+#define RCVLSC_MODE 0x00003400
+#define RCVLSC_MODE_RESET 0x00000001
+#define RCVLSC_MODE_ENABLE 0x00000002
+#define RCVLSC_MODE_ATTN_ENABLE 0x00000004
+#define RCVLSC_STATUS 0x00003404
+#define RCVLSC_STATUS_ERROR_ATTN 0x00000004
+/* 0x3408 --> 0x3800 unused */
+
+/* Mbuf cluster free registers */
+#define MBFREE_MODE 0x00003800
+#define MBFREE_MODE_RESET 0x00000001
+#define MBFREE_MODE_ENABLE 0x00000002
+#define MBFREE_STATUS 0x00003804
+/* 0x3808 --> 0x3c00 unused */
+
+/* Host coalescing control registers */
+#define HOSTCC_MODE 0x00003c00
+#define HOSTCC_MODE_RESET 0x00000001
+#define HOSTCC_MODE_ENABLE 0x00000002
+#define HOSTCC_MODE_ATTN 0x00000004
+#define HOSTCC_MODE_NOW 0x00000008
+#define HOSTCC_MODE_FULL_STATUS 0x00000000
+#define HOSTCC_MODE_64BYTE 0x00000080
+#define HOSTCC_MODE_32BYTE 0x00000100
+#define HOSTCC_MODE_CLRTICK_RXBD 0x00000200
+#define HOSTCC_MODE_CLRTICK_TXBD 0x00000400
+#define HOSTCC_MODE_NOINT_ON_NOW 0x00000800
+#define HOSTCC_MODE_NOINT_ON_FORCE 0x00001000
+#define HOSTCC_STATUS 0x00003c04
+#define HOSTCC_STATUS_ERROR_ATTN 0x00000004
+#define HOSTCC_RXCOL_TICKS 0x00003c08
+#define LOW_RXCOL_TICKS 0x00000032
+#define DEFAULT_RXCOL_TICKS 0x00000048
+#define HIGH_RXCOL_TICKS 0x00000096
+#define HOSTCC_TXCOL_TICKS 0x00003c0c
+#define LOW_TXCOL_TICKS 0x00000096
+#define DEFAULT_TXCOL_TICKS 0x0000012c
+#define HIGH_TXCOL_TICKS 0x00000145
+#define HOSTCC_RXMAX_FRAMES 0x00003c10
+#define LOW_RXMAX_FRAMES 0x00000005
+#define DEFAULT_RXMAX_FRAMES 0x00000008
+#define HIGH_RXMAX_FRAMES 0x00000012
+#define HOSTCC_TXMAX_FRAMES 0x00003c14
+#define LOW_TXMAX_FRAMES 0x00000035
+#define DEFAULT_TXMAX_FRAMES 0x0000004b
+#define HIGH_TXMAX_FRAMES 0x00000052
+#define HOSTCC_RXCOAL_TICK_INT 0x00003c18
+#define DEFAULT_RXCOAL_TICK_INT 0x00000019
+#define HOSTCC_TXCOAL_TICK_INT 0x00003c1c
+#define DEFAULT_TXCOAL_TICK_INT 0x00000019
+#define HOSTCC_RXCOAL_MAXF_INT 0x00003c20
+#define DEFAULT_RXCOAL_MAXF_INT 0x00000005
+#define HOSTCC_TXCOAL_MAXF_INT 0x00003c24
+#define DEFAULT_TXCOAL_MAXF_INT 0x00000005
+#define HOSTCC_STAT_COAL_TICKS 0x00003c28
+#define DEFAULT_STAT_COAL_TICKS 0x000f4240
+/* 0x3c2c --> 0x3c30 unused */
+#define HOSTCC_STATS_BLK_HOST_ADDR 0x00003c30 /* 64-bit */
+#define HOSTCC_STATUS_BLK_HOST_ADDR 0x00003c38 /* 64-bit */
+#define HOSTCC_STATS_BLK_NIC_ADDR 0x00003c40
+#define HOSTCC_STATUS_BLK_NIC_ADDR 0x00003c44
+#define HOSTCC_FLOW_ATTN 0x00003c48
+/* 0x3c4c --> 0x3c50 unused */
+#define HOSTCC_JUMBO_CON_IDX 0x00003c50
+#define HOSTCC_STD_CON_IDX 0x00003c54
+#define HOSTCC_MINI_CON_IDX 0x00003c58
+/* 0x3c5c --> 0x3c80 unused */
+#define HOSTCC_RET_PROD_IDX_0 0x00003c80
+#define HOSTCC_RET_PROD_IDX_1 0x00003c84
+#define HOSTCC_RET_PROD_IDX_2 0x00003c88
+#define HOSTCC_RET_PROD_IDX_3 0x00003c8c
+#define HOSTCC_RET_PROD_IDX_4 0x00003c90
+#define HOSTCC_RET_PROD_IDX_5 0x00003c94
+#define HOSTCC_RET_PROD_IDX_6 0x00003c98
+#define HOSTCC_RET_PROD_IDX_7 0x00003c9c
+#define HOSTCC_RET_PROD_IDX_8 0x00003ca0
+#define HOSTCC_RET_PROD_IDX_9 0x00003ca4
+#define HOSTCC_RET_PROD_IDX_10 0x00003ca8
+#define HOSTCC_RET_PROD_IDX_11 0x00003cac
+#define HOSTCC_RET_PROD_IDX_12 0x00003cb0
+#define HOSTCC_RET_PROD_IDX_13 0x00003cb4
+#define HOSTCC_RET_PROD_IDX_14 0x00003cb8
+#define HOSTCC_RET_PROD_IDX_15 0x00003cbc
+#define HOSTCC_SND_CON_IDX_0 0x00003cc0
+#define HOSTCC_SND_CON_IDX_1 0x00003cc4
+#define HOSTCC_SND_CON_IDX_2 0x00003cc8
+#define HOSTCC_SND_CON_IDX_3 0x00003ccc
+#define HOSTCC_SND_CON_IDX_4 0x00003cd0
+#define HOSTCC_SND_CON_IDX_5 0x00003cd4
+#define HOSTCC_SND_CON_IDX_6 0x00003cd8
+#define HOSTCC_SND_CON_IDX_7 0x00003cdc
+#define HOSTCC_SND_CON_IDX_8 0x00003ce0
+#define HOSTCC_SND_CON_IDX_9 0x00003ce4
+#define HOSTCC_SND_CON_IDX_10 0x00003ce8
+#define HOSTCC_SND_CON_IDX_11 0x00003cec
+#define HOSTCC_SND_CON_IDX_12 0x00003cf0
+#define HOSTCC_SND_CON_IDX_13 0x00003cf4
+#define HOSTCC_SND_CON_IDX_14 0x00003cf8
+#define HOSTCC_SND_CON_IDX_15 0x00003cfc
+/* 0x3d00 --> 0x4000 unused */
+
+/* Memory arbiter control registers */
+#define MEMARB_MODE 0x00004000
+#define MEMARB_MODE_RESET 0x00000001
+#define MEMARB_MODE_ENABLE 0x00000002
+#define MEMARB_STATUS 0x00004004
+#define MEMARB_TRAP_ADDR_LOW 0x00004008
+#define MEMARB_TRAP_ADDR_HIGH 0x0000400c
+/* 0x4010 --> 0x4400 unused */
+
+/* Buffer manager control registers */
+#define BUFMGR_MODE 0x00004400
+#define BUFMGR_MODE_RESET 0x00000001
+#define BUFMGR_MODE_ENABLE 0x00000002
+#define BUFMGR_MODE_ATTN_ENABLE 0x00000004
+#define BUFMGR_MODE_BM_TEST 0x00000008
+#define BUFMGR_MODE_MBLOW_ATTN_ENAB 0x00000010
+#define BUFMGR_STATUS 0x00004404
+#define BUFMGR_STATUS_ERROR 0x00000004
+#define BUFMGR_STATUS_MBLOW 0x00000010
+#define BUFMGR_MB_POOL_ADDR 0x00004408
+#define BUFMGR_MB_POOL_SIZE 0x0000440c
+#define BUFMGR_MB_RDMA_LOW_WATER 0x00004410
+#define DEFAULT_MB_RDMA_LOW_WATER 0x00000040
+#define DEFAULT_MB_RDMA_LOW_WATER_JUMBO 0x00000130
+#define BUFMGR_MB_MACRX_LOW_WATER 0x00004414
+#define DEFAULT_MB_MACRX_LOW_WATER 0x00000020
+#define DEFAULT_MB_MACRX_LOW_WATER_JUMBO 0x00000098
+#define BUFMGR_MB_HIGH_WATER 0x00004418
+#define DEFAULT_MB_HIGH_WATER 0x00000060
+#define DEFAULT_MB_HIGH_WATER_JUMBO 0x0000017c
+#define BUFMGR_RX_MB_ALLOC_REQ 0x0000441c
+#define BUFMGR_MB_ALLOC_BIT 0x10000000
+#define BUFMGR_RX_MB_ALLOC_RESP 0x00004420
+#define BUFMGR_TX_MB_ALLOC_REQ 0x00004424
+#define BUFMGR_TX_MB_ALLOC_RESP 0x00004428
+#define BUFMGR_DMA_DESC_POOL_ADDR 0x0000442c
+#define BUFMGR_DMA_DESC_POOL_SIZE 0x00004430
+#define BUFMGR_DMA_LOW_WATER 0x00004434
+#define DEFAULT_DMA_LOW_WATER 0x00000005
+#define BUFMGR_DMA_HIGH_WATER 0x00004438
+#define DEFAULT_DMA_HIGH_WATER 0x0000000a
+#define BUFMGR_RX_DMA_ALLOC_REQ 0x0000443c
+#define BUFMGR_RX_DMA_ALLOC_RESP 0x00004440
+#define BUFMGR_TX_DMA_ALLOC_REQ 0x00004444
+#define BUFMGR_TX_DMA_ALLOC_RESP 0x00004448
+#define BUFMGR_HWDIAG_0 0x0000444c
+#define BUFMGR_HWDIAG_1 0x00004450
+#define BUFMGR_HWDIAG_2 0x00004454
+/* 0x4458 --> 0x4800 unused */
+
+/* Read DMA control registers */
+#define RDMAC_MODE 0x00004800
+#define RDMAC_MODE_RESET 0x00000001
+#define RDMAC_MODE_ENABLE 0x00000002
+#define RDMAC_MODE_TGTABORT_ENAB 0x00000004
+#define RDMAC_MODE_MSTABORT_ENAB 0x00000008
+#define RDMAC_MODE_PARITYERR_ENAB 0x00000010
+#define RDMAC_MODE_ADDROFLOW_ENAB 0x00000020
+#define RDMAC_MODE_FIFOOFLOW_ENAB 0x00000040
+#define RDMAC_MODE_FIFOURUN_ENAB 0x00000080
+#define RDMAC_MODE_FIFOOREAD_ENAB 0x00000100
+#define RDMAC_MODE_LNGREAD_ENAB 0x00000200
+#define RDMAC_MODE_SPLIT_ENABLE 0x00000800
+#define RDMAC_MODE_SPLIT_RESET 0x00001000
+#define RDMAC_STATUS 0x00004804
+#define RDMAC_STATUS_TGTABORT 0x00000004
+#define RDMAC_STATUS_MSTABORT 0x00000008
+#define RDMAC_STATUS_PARITYERR 0x00000010
+#define RDMAC_STATUS_ADDROFLOW 0x00000020
+#define RDMAC_STATUS_FIFOOFLOW 0x00000040
+#define RDMAC_STATUS_FIFOURUN 0x00000080
+#define RDMAC_STATUS_FIFOOREAD 0x00000100
+#define RDMAC_STATUS_LNGREAD 0x00000200
+/* 0x4808 --> 0x4c00 unused */
+
+/* Write DMA control registers */
+#define WDMAC_MODE 0x00004c00
+#define WDMAC_MODE_RESET 0x00000001
+#define WDMAC_MODE_ENABLE 0x00000002
+#define WDMAC_MODE_TGTABORT_ENAB 0x00000004
+#define WDMAC_MODE_MSTABORT_ENAB 0x00000008
+#define WDMAC_MODE_PARITYERR_ENAB 0x00000010
+#define WDMAC_MODE_ADDROFLOW_ENAB 0x00000020
+#define WDMAC_MODE_FIFOOFLOW_ENAB 0x00000040
+#define WDMAC_MODE_FIFOURUN_ENAB 0x00000080
+#define WDMAC_MODE_FIFOOREAD_ENAB 0x00000100
+#define WDMAC_MODE_LNGREAD_ENAB 0x00000200
+#define WDMAC_STATUS 0x00004c04
+#define WDMAC_STATUS_TGTABORT 0x00000004
+#define WDMAC_STATUS_MSTABORT 0x00000008
+#define WDMAC_STATUS_PARITYERR 0x00000010
+#define WDMAC_STATUS_ADDROFLOW 0x00000020
+#define WDMAC_STATUS_FIFOOFLOW 0x00000040
+#define WDMAC_STATUS_FIFOURUN 0x00000080
+#define WDMAC_STATUS_FIFOOREAD 0x00000100
+#define WDMAC_STATUS_LNGREAD 0x00000200
+/* 0x4c08 --> 0x5000 unused */
+
+/* Per-cpu register offsets (arm9) */
+#define CPU_MODE 0x00000000
+#define CPU_MODE_RESET 0x00000001
+#define CPU_MODE_HALT 0x00000400
+#define CPU_STATE 0x00000004
+#define CPU_EVTMASK 0x00000008
+/* 0xc --> 0x1c reserved */
+#define CPU_PC 0x0000001c
+#define CPU_INSN 0x00000020
+#define CPU_SPAD_UFLOW 0x00000024
+#define CPU_WDOG_CLEAR 0x00000028
+#define CPU_WDOG_VECTOR 0x0000002c
+#define CPU_WDOG_PC 0x00000030
+#define CPU_HW_BP 0x00000034
+/* 0x38 --> 0x44 unused */
+#define CPU_WDOG_SAVED_STATE 0x00000044
+#define CPU_LAST_BRANCH_ADDR 0x00000048
+#define CPU_SPAD_UFLOW_SET 0x0000004c
+/* 0x50 --> 0x200 unused */
+#define CPU_R0 0x00000200
+#define CPU_R1 0x00000204
+#define CPU_R2 0x00000208
+#define CPU_R3 0x0000020c
+#define CPU_R4 0x00000210
+#define CPU_R5 0x00000214
+#define CPU_R6 0x00000218
+#define CPU_R7 0x0000021c
+#define CPU_R8 0x00000220
+#define CPU_R9 0x00000224
+#define CPU_R10 0x00000228
+#define CPU_R11 0x0000022c
+#define CPU_R12 0x00000230
+#define CPU_R13 0x00000234
+#define CPU_R14 0x00000238
+#define CPU_R15 0x0000023c
+#define CPU_R16 0x00000240
+#define CPU_R17 0x00000244
+#define CPU_R18 0x00000248
+#define CPU_R19 0x0000024c
+#define CPU_R20 0x00000250
+#define CPU_R21 0x00000254
+#define CPU_R22 0x00000258
+#define CPU_R23 0x0000025c
+#define CPU_R24 0x00000260
+#define CPU_R25 0x00000264
+#define CPU_R26 0x00000268
+#define CPU_R27 0x0000026c
+#define CPU_R28 0x00000270
+#define CPU_R29 0x00000274
+#define CPU_R30 0x00000278
+#define CPU_R31 0x0000027c
+/* 0x280 --> 0x400 unused */
+
+#define RX_CPU_BASE 0x00005000
+#define TX_CPU_BASE 0x00005400
+
+/* Mailboxes */
+#define GRCMBOX_INTERRUPT_0 0x00005800 /* 64-bit */
+#define GRCMBOX_INTERRUPT_1 0x00005808 /* 64-bit */
+#define GRCMBOX_INTERRUPT_2 0x00005810 /* 64-bit */
+#define GRCMBOX_INTERRUPT_3 0x00005818 /* 64-bit */
+#define GRCMBOX_GENERAL_0 0x00005820 /* 64-bit */
+#define GRCMBOX_GENERAL_1 0x00005828 /* 64-bit */
+#define GRCMBOX_GENERAL_2 0x00005830 /* 64-bit */
+#define GRCMBOX_GENERAL_3 0x00005838 /* 64-bit */
+#define GRCMBOX_GENERAL_4 0x00005840 /* 64-bit */
+#define GRCMBOX_GENERAL_5 0x00005848 /* 64-bit */
+#define GRCMBOX_GENERAL_6 0x00005850 /* 64-bit */
+#define GRCMBOX_GENERAL_7 0x00005858 /* 64-bit */
+#define GRCMBOX_RELOAD_STAT 0x00005860 /* 64-bit */
+#define GRCMBOX_RCVSTD_PROD_IDX 0x00005868 /* 64-bit */
+#define GRCMBOX_RCVJUMBO_PROD_IDX 0x00005870 /* 64-bit */
+#define GRCMBOX_RCVMINI_PROD_IDX 0x00005878 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_0 0x00005880 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_1 0x00005888 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_2 0x00005890 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_3 0x00005898 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_4 0x000058a0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_5 0x000058a8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_6 0x000058b0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_7 0x000058b8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_8 0x000058c0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_9 0x000058c8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_10 0x000058d0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_11 0x000058d8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_12 0x000058e0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_13 0x000058e8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_14 0x000058f0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_15 0x000058f8 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_0 0x00005900 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_1 0x00005908 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_2 0x00005910 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_3 0x00005918 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_4 0x00005920 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_5 0x00005928 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_6 0x00005930 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_7 0x00005938 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_8 0x00005940 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_9 0x00005948 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_10 0x00005950 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_11 0x00005958 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_12 0x00005960 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_13 0x00005968 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_14 0x00005970 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_15 0x00005978 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_0 0x00005980 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_1 0x00005988 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_2 0x00005990 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_3 0x00005998 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_4 0x000059a0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_5 0x000059a8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_6 0x000059b0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_7 0x000059b8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_8 0x000059c0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_9 0x000059c8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_10 0x000059d0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_11 0x000059d8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_12 0x000059e0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_13 0x000059e8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_14 0x000059f0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_15 0x000059f8 /* 64-bit */
+#define GRCMBOX_HIGH_PRIO_EV_VECTOR 0x00005a00
+#define GRCMBOX_HIGH_PRIO_EV_MASK 0x00005a04
+#define GRCMBOX_LOW_PRIO_EV_VEC 0x00005a08
+#define GRCMBOX_LOW_PRIO_EV_MASK 0x00005a0c
+/* 0x5a10 --> 0x5c00 */
+
+/* Flow Through queues */
+#define FTQ_RESET 0x00005c00
+/* 0x5c04 --> 0x5c10 unused */
+#define FTQ_DMA_NORM_READ_CTL 0x00005c10
+#define FTQ_DMA_NORM_READ_FULL_CNT 0x00005c14
+#define FTQ_DMA_NORM_READ_FIFO_ENQDEQ 0x00005c18
+#define FTQ_DMA_NORM_READ_WRITE_PEEK 0x00005c1c
+#define FTQ_DMA_HIGH_READ_CTL 0x00005c20
+#define FTQ_DMA_HIGH_READ_FULL_CNT 0x00005c24
+#define FTQ_DMA_HIGH_READ_FIFO_ENQDEQ 0x00005c28
+#define FTQ_DMA_HIGH_READ_WRITE_PEEK 0x00005c2c
+#define FTQ_DMA_COMP_DISC_CTL 0x00005c30
+#define FTQ_DMA_COMP_DISC_FULL_CNT 0x00005c34
+#define FTQ_DMA_COMP_DISC_FIFO_ENQDEQ 0x00005c38
+#define FTQ_DMA_COMP_DISC_WRITE_PEEK 0x00005c3c
+#define FTQ_SEND_BD_COMP_CTL 0x00005c40
+#define FTQ_SEND_BD_COMP_FULL_CNT 0x00005c44
+#define FTQ_SEND_BD_COMP_FIFO_ENQDEQ 0x00005c48
+#define FTQ_SEND_BD_COMP_WRITE_PEEK 0x00005c4c
+#define FTQ_SEND_DATA_INIT_CTL 0x00005c50
+#define FTQ_SEND_DATA_INIT_FULL_CNT 0x00005c54
+#define FTQ_SEND_DATA_INIT_FIFO_ENQDEQ 0x00005c58
+#define FTQ_SEND_DATA_INIT_WRITE_PEEK 0x00005c5c
+#define FTQ_DMA_NORM_WRITE_CTL 0x00005c60
+#define FTQ_DMA_NORM_WRITE_FULL_CNT 0x00005c64
+#define FTQ_DMA_NORM_WRITE_FIFO_ENQDEQ 0x00005c68
+#define FTQ_DMA_NORM_WRITE_WRITE_PEEK 0x00005c6c
+#define FTQ_DMA_HIGH_WRITE_CTL 0x00005c70
+#define FTQ_DMA_HIGH_WRITE_FULL_CNT 0x00005c74
+#define FTQ_DMA_HIGH_WRITE_FIFO_ENQDEQ 0x00005c78
+#define FTQ_DMA_HIGH_WRITE_WRITE_PEEK 0x00005c7c
+#define FTQ_SWTYPE1_CTL 0x00005c80
+#define FTQ_SWTYPE1_FULL_CNT 0x00005c84
+#define FTQ_SWTYPE1_FIFO_ENQDEQ 0x00005c88
+#define FTQ_SWTYPE1_WRITE_PEEK 0x00005c8c
+#define FTQ_SEND_DATA_COMP_CTL 0x00005c90
+#define FTQ_SEND_DATA_COMP_FULL_CNT 0x00005c94
+#define FTQ_SEND_DATA_COMP_FIFO_ENQDEQ 0x00005c98
+#define FTQ_SEND_DATA_COMP_WRITE_PEEK 0x00005c9c
+#define FTQ_HOST_COAL_CTL 0x00005ca0
+#define FTQ_HOST_COAL_FULL_CNT 0x00005ca4
+#define FTQ_HOST_COAL_FIFO_ENQDEQ 0x00005ca8
+#define FTQ_HOST_COAL_WRITE_PEEK 0x00005cac
+#define FTQ_MAC_TX_CTL 0x00005cb0
+#define FTQ_MAC_TX_FULL_CNT 0x00005cb4
+#define FTQ_MAC_TX_FIFO_ENQDEQ 0x00005cb8
+#define FTQ_MAC_TX_WRITE_PEEK 0x00005cbc
+#define FTQ_MB_FREE_CTL 0x00005cc0
+#define FTQ_MB_FREE_FULL_CNT 0x00005cc4
+#define FTQ_MB_FREE_FIFO_ENQDEQ 0x00005cc8
+#define FTQ_MB_FREE_WRITE_PEEK 0x00005ccc
+#define FTQ_RCVBD_COMP_CTL 0x00005cd0
+#define FTQ_RCVBD_COMP_FULL_CNT 0x00005cd4
+#define FTQ_RCVBD_COMP_FIFO_ENQDEQ 0x00005cd8
+#define FTQ_RCVBD_COMP_WRITE_PEEK 0x00005cdc
+#define FTQ_RCVLST_PLMT_CTL 0x00005ce0
+#define FTQ_RCVLST_PLMT_FULL_CNT 0x00005ce4
+#define FTQ_RCVLST_PLMT_FIFO_ENQDEQ 0x00005ce8
+#define FTQ_RCVLST_PLMT_WRITE_PEEK 0x00005cec
+#define FTQ_RCVDATA_INI_CTL 0x00005cf0
+#define FTQ_RCVDATA_INI_FULL_CNT 0x00005cf4
+#define FTQ_RCVDATA_INI_FIFO_ENQDEQ 0x00005cf8
+#define FTQ_RCVDATA_INI_WRITE_PEEK 0x00005cfc
+#define FTQ_RCVDATA_COMP_CTL 0x00005d00
+#define FTQ_RCVDATA_COMP_FULL_CNT 0x00005d04
+#define FTQ_RCVDATA_COMP_FIFO_ENQDEQ 0x00005d08
+#define FTQ_RCVDATA_COMP_WRITE_PEEK 0x00005d0c
+#define FTQ_SWTYPE2_CTL 0x00005d10
+#define FTQ_SWTYPE2_FULL_CNT 0x00005d14
+#define FTQ_SWTYPE2_FIFO_ENQDEQ 0x00005d18
+#define FTQ_SWTYPE2_WRITE_PEEK 0x00005d1c
+/* 0x5d20 --> 0x6000 unused */
+
+/* Message signaled interrupt registers */
+#define MSGINT_MODE 0x00006000
+#define MSGINT_MODE_RESET 0x00000001
+#define MSGINT_MODE_ENABLE 0x00000002
+#define MSGINT_STATUS 0x00006004
+#define MSGINT_FIFO 0x00006008
+/* 0x600c --> 0x6400 unused */
+
+/* DMA completion registers */
+#define DMAC_MODE 0x00006400
+#define DMAC_MODE_RESET 0x00000001
+#define DMAC_MODE_ENABLE 0x00000002
+/* 0x6404 --> 0x6800 unused */
+
+/* GRC registers */
+#define GRC_MODE 0x00006800
+#define GRC_MODE_UPD_ON_COAL 0x00000001
+#define GRC_MODE_BSWAP_NONFRM_DATA 0x00000002
+#define GRC_MODE_WSWAP_NONFRM_DATA 0x00000004
+#define GRC_MODE_BSWAP_DATA 0x00000010
+#define GRC_MODE_WSWAP_DATA 0x00000020
+#define GRC_MODE_SPLITHDR 0x00000100
+#define GRC_MODE_NOFRM_CRACKING 0x00000200
+#define GRC_MODE_INCL_CRC 0x00000400
+#define GRC_MODE_ALLOW_BAD_FRMS 0x00000800
+#define GRC_MODE_NOIRQ_ON_SENDS 0x00002000
+#define GRC_MODE_NOIRQ_ON_RCV 0x00004000
+#define GRC_MODE_FORCE_PCI32BIT 0x00008000
+#define GRC_MODE_HOST_STACKUP 0x00010000
+#define GRC_MODE_HOST_SENDBDS 0x00020000
+#define GRC_MODE_NO_TX_PHDR_CSUM 0x00100000
+#define GRC_MODE_NO_RX_PHDR_CSUM 0x00800000
+#define GRC_MODE_IRQ_ON_TX_CPU_ATTN 0x01000000
+#define GRC_MODE_IRQ_ON_RX_CPU_ATTN 0x02000000
+#define GRC_MODE_IRQ_ON_MAC_ATTN 0x04000000
+#define GRC_MODE_IRQ_ON_DMA_ATTN 0x08000000
+#define GRC_MODE_IRQ_ON_FLOW_ATTN 0x10000000
+#define GRC_MODE_4X_NIC_SEND_RINGS 0x20000000
+#define GRC_MODE_MCAST_FRM_ENABLE 0x40000000
+#define GRC_MISC_CFG 0x00006804
+#define GRC_MISC_CFG_CORECLK_RESET 0x00000001
+#define GRC_MISC_CFG_PRESCALAR_MASK 0x000000fe
+#define GRC_MISC_CFG_PRESCALAR_SHIFT 1
+#define GRC_MISC_CFG_BOARD_ID_MASK 0x0001e000
+#define GRC_MISC_CFG_BOARD_ID_5700 0x0001e000
+#define GRC_MISC_CFG_BOARD_ID_5701 0x00000000
+#define GRC_MISC_CFG_BOARD_ID_5702FE 0x00004000
+#define GRC_MISC_CFG_BOARD_ID_5703 0x00000000
+#define GRC_MISC_CFG_BOARD_ID_5703S 0x00002000
+#define GRC_MISC_CFG_BOARD_ID_5704 0x00000000
+#define GRC_MISC_CFG_BOARD_ID_5704CIOBE 0x00004000
+#define GRC_MISC_CFG_BOARD_ID_5704_A2 0x00008000
+#define GRC_MISC_CFG_BOARD_ID_5704_X 0x0000C000
+#define GRC_MISC_CFG_BOARD_ID_AC91002A1 0x00018000
+#define GRC_LOCAL_CTRL 0x00006808
+#define GRC_LCLCTRL_INT_ACTIVE 0x00000001
+#define GRC_LCLCTRL_CLEARINT 0x00000002
+#define GRC_LCLCTRL_SETINT 0x00000004
+#define GRC_LCLCTRL_INT_ON_ATTN 0x00000008
+#define GRC_LCLCTRL_GPIO_INPUT0 0x00000100
+#define GRC_LCLCTRL_GPIO_INPUT1 0x00000200
+#define GRC_LCLCTRL_GPIO_INPUT2 0x00000400
+#define GRC_LCLCTRL_GPIO_OE0 0x00000800
+#define GRC_LCLCTRL_GPIO_OE1 0x00001000
+#define GRC_LCLCTRL_GPIO_OE2 0x00002000
+#define GRC_LCLCTRL_GPIO_OUTPUT0 0x00004000
+#define GRC_LCLCTRL_GPIO_OUTPUT1 0x00008000
+#define GRC_LCLCTRL_GPIO_OUTPUT2 0x00010000
+#define GRC_LCLCTRL_EXTMEM_ENABLE 0x00020000
+#define GRC_LCLCTRL_MEMSZ_MASK 0x001c0000
+#define GRC_LCLCTRL_MEMSZ_256K 0x00000000
+#define GRC_LCLCTRL_MEMSZ_512K 0x00040000
+#define GRC_LCLCTRL_MEMSZ_1M 0x00080000
+#define GRC_LCLCTRL_MEMSZ_2M 0x000c0000
+#define GRC_LCLCTRL_MEMSZ_4M 0x00100000
+#define GRC_LCLCTRL_MEMSZ_8M 0x00140000
+#define GRC_LCLCTRL_MEMSZ_16M 0x00180000
+#define GRC_LCLCTRL_BANK_SELECT 0x00200000
+#define GRC_LCLCTRL_SSRAM_TYPE 0x00400000
+#define GRC_LCLCTRL_AUTO_SEEPROM 0x01000000
+#define GRC_TIMER 0x0000680c
+#define GRC_RX_CPU_EVENT 0x00006810
+#define GRC_RX_TIMER_REF 0x00006814
+#define GRC_RX_CPU_SEM 0x00006818
+#define GRC_REMOTE_RX_CPU_ATTN 0x0000681c
+#define GRC_TX_CPU_EVENT 0x00006820
+#define GRC_TX_TIMER_REF 0x00006824
+#define GRC_TX_CPU_SEM 0x00006828
+#define GRC_REMOTE_TX_CPU_ATTN 0x0000682c
+#define GRC_MEM_POWER_UP 0x00006830 /* 64-bit */
+#define GRC_EEPROM_ADDR 0x00006838
+#define EEPROM_ADDR_WRITE 0x00000000
+#define EEPROM_ADDR_READ 0x80000000
+#define EEPROM_ADDR_COMPLETE 0x40000000
+#define EEPROM_ADDR_FSM_RESET 0x20000000
+#define EEPROM_ADDR_DEVID_MASK 0x1c000000
+#define EEPROM_ADDR_DEVID_SHIFT 26
+#define EEPROM_ADDR_START 0x02000000
+#define EEPROM_ADDR_CLKPERD_SHIFT 16
+#define EEPROM_ADDR_ADDR_MASK 0x0000ffff
+#define EEPROM_ADDR_ADDR_SHIFT 0
+#define EEPROM_DEFAULT_CLOCK_PERIOD 0x60
+#define EEPROM_CHIP_SIZE (64 * 1024)
+#define GRC_EEPROM_DATA 0x0000683c
+#define GRC_EEPROM_CTRL 0x00006840
+#define GRC_MDI_CTRL 0x00006844
+#define GRC_SEEPROM_DELAY 0x00006848
+/* 0x684c --> 0x6c00 unused */
+
+/* 0x6c00 --> 0x7000 unused */
+
+/* NVRAM Control registers */
+#define NVRAM_CMD 0x00007000
+#define NVRAM_CMD_RESET 0x00000001
+#define NVRAM_CMD_DONE 0x00000008
+#define NVRAM_CMD_GO 0x00000010
+#define NVRAM_CMD_WR 0x00000020
+#define NVRAM_CMD_RD 0x00000000
+#define NVRAM_CMD_ERASE 0x00000040
+#define NVRAM_CMD_FIRST 0x00000080
+#define NVRAM_CMD_LAST 0x00000100
+#define NVRAM_STAT 0x00007004
+#define NVRAM_WRDATA 0x00007008
+#define NVRAM_ADDR 0x0000700c
+#define NVRAM_ADDR_MSK 0x00ffffff
+#define NVRAM_RDDATA 0x00007010
+#define NVRAM_CFG1 0x00007014
+#define NVRAM_CFG1_FLASHIF_ENAB 0x00000001
+#define NVRAM_CFG1_BUFFERED_MODE 0x00000002
+#define NVRAM_CFG1_PASS_THRU 0x00000004
+#define NVRAM_CFG1_BIT_BANG 0x00000008
+#define NVRAM_CFG1_COMPAT_BYPASS 0x80000000
+#define NVRAM_CFG2 0x00007018
+#define NVRAM_CFG3 0x0000701c
+#define NVRAM_SWARB 0x00007020
+#define SWARB_REQ_SET0 0x00000001
+#define SWARB_REQ_SET1 0x00000002
+#define SWARB_REQ_SET2 0x00000004
+#define SWARB_REQ_SET3 0x00000008
+#define SWARB_REQ_CLR0 0x00000010
+#define SWARB_REQ_CLR1 0x00000020
+#define SWARB_REQ_CLR2 0x00000040
+#define SWARB_REQ_CLR3 0x00000080
+#define SWARB_GNT0 0x00000100
+#define SWARB_GNT1 0x00000200
+#define SWARB_GNT2 0x00000400
+#define SWARB_GNT3 0x00000800
+#define SWARB_REQ0 0x00001000
+#define SWARB_REQ1 0x00002000
+#define SWARB_REQ2 0x00004000
+#define SWARB_REQ3 0x00008000
+#define NVRAM_BUFFERED_PAGE_SIZE 264
+#define NVRAM_BUFFERED_PAGE_POS 9
+/* 0x7024 --> 0x7400 unused */
+
+/* 0x7400 --> 0x8000 unused */
+
+/* 32K Window into NIC internal memory */
+#define NIC_SRAM_WIN_BASE 0x00008000
+
+/* Offsets into first 32k of NIC internal memory. */
+#define NIC_SRAM_PAGE_ZERO 0x00000000
+#define NIC_SRAM_SEND_RCB 0x00000100 /* 16 * TG3_BDINFO_... */
+#define NIC_SRAM_RCV_RET_RCB 0x00000200 /* 16 * TG3_BDINFO_... */
+#define NIC_SRAM_STATS_BLK 0x00000300
+#define NIC_SRAM_STATUS_BLK 0x00000b00
+
+#define NIC_SRAM_FIRMWARE_MBOX 0x00000b50
+#define NIC_SRAM_FIRMWARE_MBOX_MAGIC1 0x4B657654
+#define NIC_SRAM_FIRMWARE_MBOX_MAGIC2 0x4861764b /* !dma on linkchg */
+
+#define NIC_SRAM_DATA_SIG 0x00000b54
+#define NIC_SRAM_DATA_SIG_MAGIC 0x4b657654 /* ascii for 'KevT' */
+
+#define NIC_SRAM_DATA_CFG 0x00000b58
+#define NIC_SRAM_DATA_CFG_LED_MODE_MASK 0x0000000c
+#define NIC_SRAM_DATA_CFG_LED_MODE_UNKNOWN 0x00000000
+#define NIC_SRAM_DATA_CFG_LED_TRIPLE_SPD 0x00000004
+#define NIC_SRAM_DATA_CFG_LED_OPEN_DRAIN 0x00000004
+#define NIC_SRAM_DATA_CFG_LED_LINK_SPD 0x00000008
+#define NIC_SRAM_DATA_CFG_LED_OUTPUT 0x00000008
+#define NIC_SRAM_DATA_CFG_PHY_TYPE_MASK 0x00000030
+#define NIC_SRAM_DATA_CFG_PHY_TYPE_UNKNOWN 0x00000000
+#define NIC_SRAM_DATA_CFG_PHY_TYPE_COPPER 0x00000010
+#define NIC_SRAM_DATA_CFG_PHY_TYPE_FIBER 0x00000020
+#define NIC_SRAM_DATA_CFG_WOL_ENABLE 0x00000040
+#define NIC_SRAM_DATA_CFG_ASF_ENABLE 0x00000080
+#define NIC_SRAM_DATA_CFG_EEPROM_WP 0x00000100
+#define NIC_SRAM_DATA_CFG_FIBER_WOL 0x00004000
+
+#define NIC_SRAM_DATA_PHY_ID 0x00000b74
+#define NIC_SRAM_DATA_PHY_ID1_MASK 0xffff0000
+#define NIC_SRAM_DATA_PHY_ID2_MASK 0x0000ffff
+
+#define NIC_SRAM_FW_CMD_MBOX 0x00000b78
+#define FWCMD_NICDRV_ALIVE 0x00000001
+#define FWCMD_NICDRV_PAUSE_FW 0x00000002
+#define FWCMD_NICDRV_IPV4ADDR_CHG 0x00000003
+#define FWCMD_NICDRV_IPV6ADDR_CHG 0x00000004
+#define FWCMD_NICDRV_FIX_DMAR 0x00000005
+#define FWCMD_NICDRV_FIX_DMAW 0x00000006
+#define NIC_SRAM_FW_CMD_LEN_MBOX 0x00000b7c
+#define NIC_SRAM_FW_CMD_DATA_MBOX 0x00000b80
+#define NIC_SRAM_FW_ASF_STATUS_MBOX 0x00000c00
+#define NIC_SRAM_FW_DRV_STATE_MBOX 0x00000c04
+#define DRV_STATE_START 0x00000001
+#define DRV_STATE_UNLOAD 0x00000002
+#define DRV_STATE_WOL 0x00000003
+#define DRV_STATE_SUSPEND 0x00000004
+
+#define NIC_SRAM_FW_RESET_TYPE_MBOX 0x00000c08
+
+#define NIC_SRAM_MAC_ADDR_HIGH_MBOX 0x00000c14
+#define NIC_SRAM_MAC_ADDR_LOW_MBOX 0x00000c18
+
+#define NIC_SRAM_RX_MINI_BUFFER_DESC 0x00001000
+
+#define NIC_SRAM_DMA_DESC_POOL_BASE 0x00002000
+#define NIC_SRAM_DMA_DESC_POOL_SIZE 0x00002000
+#define NIC_SRAM_TX_BUFFER_DESC 0x00004000 /* 512 entries */
+#define NIC_SRAM_RX_BUFFER_DESC 0x00006000 /* 256 entries */
+#define NIC_SRAM_RX_JUMBO_BUFFER_DESC 0x00007000 /* 256 entries */
+#define NIC_SRAM_MBUF_POOL_BASE 0x00008000
+#define NIC_SRAM_MBUF_POOL_SIZE96 0x00018000
+#define NIC_SRAM_MBUF_POOL_SIZE64 0x00010000
+
+/* Currently this is fixed. */
+#define PHY_ADDR 0x01
+
+/* Tigon3 specific PHY MII registers. */
+#define TG3_BMCR_SPEED1000 0x0040
+
+#define MII_TG3_CTRL 0x09 /* 1000-baseT control register */
+#define MII_TG3_CTRL_ADV_1000_HALF 0x0100
+#define MII_TG3_CTRL_ADV_1000_FULL 0x0200
+#define MII_TG3_CTRL_AS_MASTER 0x0800
+#define MII_TG3_CTRL_ENABLE_AS_MASTER 0x1000
+
+#define MII_TG3_EXT_CTRL 0x10 /* Extended control register */
+#define MII_TG3_EXT_CTRL_LNK3_LED_MODE 0x0002
+#define MII_TG3_EXT_CTRL_TBI 0x8000
+
+#define MII_TG3_EXT_STAT 0x11 /* Extended status register */
+#define MII_TG3_EXT_STAT_LPASS 0x0100
+
+#define MII_TG3_DSP_RW_PORT 0x15 /* DSP coefficient read/write port */
+
+#define MII_TG3_DSP_ADDRESS 0x17 /* DSP address register */
+
+#define MII_TG3_AUX_CTRL 0x18 /* auxilliary control register */
+
+#define MII_TG3_AUX_STAT 0x19 /* auxilliary status register */
+#define MII_TG3_AUX_STAT_LPASS 0x0004
+#define MII_TG3_AUX_STAT_SPDMASK 0x0700
+#define MII_TG3_AUX_STAT_10HALF 0x0100
+#define MII_TG3_AUX_STAT_10FULL 0x0200
+#define MII_TG3_AUX_STAT_100HALF 0x0300
+#define MII_TG3_AUX_STAT_100_4 0x0400
+#define MII_TG3_AUX_STAT_100FULL 0x0500
+#define MII_TG3_AUX_STAT_1000HALF 0x0600
+#define MII_TG3_AUX_STAT_1000FULL 0x0700
+
+#define MII_TG3_ISTAT 0x1a /* IRQ status register */
+#define MII_TG3_IMASK 0x1b /* IRQ mask register */
+
+/* ISTAT/IMASK event bits */
+#define MII_TG3_INT_LINKCHG 0x0002
+#define MII_TG3_INT_SPEEDCHG 0x0004
+#define MII_TG3_INT_DUPLEXCHG 0x0008
+#define MII_TG3_INT_ANEG_PAGE_RX 0x0400
+
+/* XXX Add this to mii.h */
+#ifndef ADVERTISE_PAUSE
+#define ADVERTISE_PAUSE_CAP 0x0400
+#endif
+#ifndef ADVERTISE_PAUSE_ASYM
+#define ADVERTISE_PAUSE_ASYM 0x0800
+#endif
+#ifndef LPA_PAUSE
+#define LPA_PAUSE_CAP 0x0400
+#endif
+#ifndef LPA_PAUSE_ASYM
+#define LPA_PAUSE_ASYM 0x0800
+#endif
+
+/* There are two ways to manage the TX descriptors on the tigon3.
+ * Either the descriptors are in host DMA'able memory, or they
+ * exist only in the cards on-chip SRAM. All 16 send bds are under
+ * the same mode, they may not be configured individually.
+ *
+ * The mode we use is controlled by TG3_FLAG_HOST_TXDS in tp->tg3_flags.
+ *
+ * To use host memory TX descriptors:
+ * 1) Set GRC_MODE_HOST_SENDBDS in GRC_MODE register.
+ * Make sure GRC_MODE_4X_NIC_SEND_RINGS is clear.
+ * 2) Allocate DMA'able memory.
+ * 3) In NIC_SRAM_SEND_RCB (of desired index) of on-chip SRAM:
+ * a) Set TG3_BDINFO_HOST_ADDR to DMA address of memory
+ * obtained in step 2
+ * b) Set TG3_BDINFO_NIC_ADDR to NIC_SRAM_TX_BUFFER_DESC.
+ * c) Set len field of TG3_BDINFO_MAXLEN_FLAGS to number
+ * of TX descriptors. Leave flags field clear.
+ * 4) Access TX descriptors via host memory. The chip
+ * will refetch into local SRAM as needed when producer
+ * index mailboxes are updated.
+ *
+ * To use on-chip TX descriptors:
+ * 1) Set GRC_MODE_4X_NIC_SEND_RINGS in GRC_MODE register.
+ * Make sure GRC_MODE_HOST_SENDBDS is clear.
+ * 2) In NIC_SRAM_SEND_RCB (of desired index) of on-chip SRAM:
+ * a) Set TG3_BDINFO_HOST_ADDR to zero.
+ * b) Set TG3_BDINFO_NIC_ADDR to NIC_SRAM_TX_BUFFER_DESC
+ * c) TG3_BDINFO_MAXLEN_FLAGS is don't care.
+ * 3) Access TX descriptors directly in on-chip SRAM
+ * using normal {read,write}l(). (and not using
+ * pointer dereferencing of ioremap()'d memory like
+ * the broken Broadcom driver does)
+ *
+ * Note that BDINFO_FLAGS_DISABLED should be set in the flags field of
+ * TG3_BDINFO_MAXLEN_FLAGS of all unused SEND_RCB indices.
+ */
+struct tg3_tx_buffer_desc {
+ u32 addr_hi;
+ u32 addr_lo;
+
+ u32 len_flags;
+#define TXD_FLAG_TCPUDP_CSUM 0x0001
+#define TXD_FLAG_IP_CSUM 0x0002
+#define TXD_FLAG_END 0x0004
+#define TXD_FLAG_IP_FRAG 0x0008
+#define TXD_FLAG_IP_FRAG_END 0x0010
+#define TXD_FLAG_VLAN 0x0040
+#define TXD_FLAG_COAL_NOW 0x0080
+#define TXD_FLAG_CPU_PRE_DMA 0x0100
+#define TXD_FLAG_CPU_POST_DMA 0x0200
+#define TXD_FLAG_ADD_SRC_ADDR 0x1000
+#define TXD_FLAG_CHOOSE_SRC_ADDR 0x6000
+#define TXD_FLAG_NO_CRC 0x8000
+#define TXD_LEN_SHIFT 16
+
+ u32 vlan_tag;
+#define TXD_VLAN_TAG_SHIFT 0
+#define TXD_MSS_SHIFT 16
+};
+
+#define TXD_ADDR 0x00UL /* 64-bit */
+#define TXD_LEN_FLAGS 0x08UL /* 32-bit (upper 16-bits are len) */
+#define TXD_VLAN_TAG 0x0cUL /* 32-bit (upper 16-bits are tag) */
+#define TXD_SIZE 0x10UL
+
+struct tg3_rx_buffer_desc {
+ u32 addr_hi;
+ u32 addr_lo;
+
+ u32 idx_len;
+#define RXD_IDX_MASK 0xffff0000
+#define RXD_IDX_SHIFT 16
+#define RXD_LEN_MASK 0x0000ffff
+#define RXD_LEN_SHIFT 0
+
+ u32 type_flags;
+#define RXD_TYPE_SHIFT 16
+#define RXD_FLAGS_SHIFT 0
+
+#define RXD_FLAG_END 0x0004
+#define RXD_FLAG_MINI 0x0800
+#define RXD_FLAG_JUMBO 0x0020
+#define RXD_FLAG_VLAN 0x0040
+#define RXD_FLAG_ERROR 0x0400
+#define RXD_FLAG_IP_CSUM 0x1000
+#define RXD_FLAG_TCPUDP_CSUM 0x2000
+#define RXD_FLAG_IS_TCP 0x4000
+
+ u32 ip_tcp_csum;
+#define RXD_IPCSUM_MASK 0xffff0000
+#define RXD_IPCSUM_SHIFT 16
+#define RXD_TCPCSUM_MASK 0x0000ffff
+#define RXD_TCPCSUM_SHIFT 0
+
+ u32 err_vlan;
+
+#define RXD_VLAN_MASK 0x0000ffff
+
+#define RXD_ERR_BAD_CRC 0x00010000
+#define RXD_ERR_COLLISION 0x00020000
+#define RXD_ERR_LINK_LOST 0x00040000
+#define RXD_ERR_PHY_DECODE 0x00080000
+#define RXD_ERR_ODD_NIBBLE_RCVD_MII 0x00100000
+#define RXD_ERR_MAC_ABRT 0x00200000
+#define RXD_ERR_TOO_SMALL 0x00400000
+#define RXD_ERR_NO_RESOURCES 0x00800000
+#define RXD_ERR_HUGE_FRAME 0x01000000
+#define RXD_ERR_MASK 0xffff0000
+
+ u32 reserved;
+ u32 opaque;
+#define RXD_OPAQUE_INDEX_MASK 0x0000ffff
+#define RXD_OPAQUE_INDEX_SHIFT 0
+#define RXD_OPAQUE_RING_STD 0x00010000
+#define RXD_OPAQUE_RING_JUMBO 0x00020000
+#define RXD_OPAQUE_RING_MINI 0x00040000
+#define RXD_OPAQUE_RING_MASK 0x00070000
+};
+
+struct tg3_ext_rx_buffer_desc {
+ struct {
+ u32 addr_hi;
+ u32 addr_lo;
+ } addrlist[3];
+ u32 len2_len1;
+ u32 resv_len3;
+ struct tg3_rx_buffer_desc std;
+};
+
+/* We only use this when testing out the DMA engine
+ * at probe time. This is the internal format of buffer
+ * descriptors used by the chip at NIC_SRAM_DMA_DESCS.
+ */
+struct tg3_internal_buffer_desc {
+ u32 addr_hi;
+ u32 addr_lo;
+ u32 nic_mbuf;
+ /* XXX FIX THIS */
+#ifdef __BIG_ENDIAN
+ u16 cqid_sqid;
+ u16 len;
+#else
+ u16 len;
+ u16 cqid_sqid;
+#endif
+ u32 flags;
+ u32 __cookie1;
+ u32 __cookie2;
+ u32 __cookie3;
+};
+
+#define TG3_HW_STATUS_SIZE 0x50
+struct tg3_hw_status {
+ u32 status;
+#define SD_STATUS_UPDATED 0x00000001
+#define SD_STATUS_LINK_CHG 0x00000002
+#define SD_STATUS_ERROR 0x00000004
+
+ u32 status_tag;
+
+#ifdef __BIG_ENDIAN
+ u16 rx_consumer;
+ u16 rx_jumbo_consumer;
+#else
+ u16 rx_jumbo_consumer;
+ u16 rx_consumer;
+#endif
+
+#ifdef __BIG_ENDIAN
+ u16 reserved;
+ u16 rx_mini_consumer;
+#else
+ u16 rx_mini_consumer;
+ u16 reserved;
+#endif
+ struct {
+#ifdef __BIG_ENDIAN
+ u16 tx_consumer;
+ u16 rx_producer;
+#else
+ u16 rx_producer;
+ u16 tx_consumer;
+#endif
+ } idx[16];
+};
+
+typedef struct {
+ u32 high, low;
+} tg3_stat64_t;
+
+struct tg3_hw_stats {
+ u8 __reserved0[0x400-0x300];
+
+ /* Statistics maintained by Receive MAC. */
+ tg3_stat64_t rx_octets;
+ u64 __reserved1;
+ tg3_stat64_t rx_fragments;
+ tg3_stat64_t rx_ucast_packets;
+ tg3_stat64_t rx_mcast_packets;
+ tg3_stat64_t rx_bcast_packets;
+ tg3_stat64_t rx_fcs_errors;
+ tg3_stat64_t rx_align_errors;
+ tg3_stat64_t rx_xon_pause_rcvd;
+ tg3_stat64_t rx_xoff_pause_rcvd;
+ tg3_stat64_t rx_mac_ctrl_rcvd;
+ tg3_stat64_t rx_xoff_entered;
+ tg3_stat64_t rx_frame_too_long_errors;
+ tg3_stat64_t rx_jabbers;
+ tg3_stat64_t rx_undersize_packets;
+ tg3_stat64_t rx_in_length_errors;
+ tg3_stat64_t rx_out_length_errors;
+ tg3_stat64_t rx_64_or_less_octet_packets;
+ tg3_stat64_t rx_65_to_127_octet_packets;
+ tg3_stat64_t rx_128_to_255_octet_packets;
+ tg3_stat64_t rx_256_to_511_octet_packets;
+ tg3_stat64_t rx_512_to_1023_octet_packets;
+ tg3_stat64_t rx_1024_to_1522_octet_packets;
+ tg3_stat64_t rx_1523_to_2047_octet_packets;
+ tg3_stat64_t rx_2048_to_4095_octet_packets;
+ tg3_stat64_t rx_4096_to_8191_octet_packets;
+ tg3_stat64_t rx_8192_to_9022_octet_packets;
+
+ u64 __unused0[37];
+
+ /* Statistics maintained by Transmit MAC. */
+ tg3_stat64_t tx_octets;
+ u64 __reserved2;
+ tg3_stat64_t tx_collisions;
+ tg3_stat64_t tx_xon_sent;
+ tg3_stat64_t tx_xoff_sent;
+ tg3_stat64_t tx_flow_control;
+ tg3_stat64_t tx_mac_errors;
+ tg3_stat64_t tx_single_collisions;
+ tg3_stat64_t tx_mult_collisions;
+ tg3_stat64_t tx_deferred;
+ u64 __reserved3;
+ tg3_stat64_t tx_excessive_collisions;
+ tg3_stat64_t tx_late_collisions;
+ tg3_stat64_t tx_collide_2times;
+ tg3_stat64_t tx_collide_3times;
+ tg3_stat64_t tx_collide_4times;
+ tg3_stat64_t tx_collide_5times;
+ tg3_stat64_t tx_collide_6times;
+ tg3_stat64_t tx_collide_7times;
+ tg3_stat64_t tx_collide_8times;
+ tg3_stat64_t tx_collide_9times;
+ tg3_stat64_t tx_collide_10times;
+ tg3_stat64_t tx_collide_11times;
+ tg3_stat64_t tx_collide_12times;
+ tg3_stat64_t tx_collide_13times;
+ tg3_stat64_t tx_collide_14times;
+ tg3_stat64_t tx_collide_15times;
+ tg3_stat64_t tx_ucast_packets;
+ tg3_stat64_t tx_mcast_packets;
+ tg3_stat64_t tx_bcast_packets;
+ tg3_stat64_t tx_carrier_sense_errors;
+ tg3_stat64_t tx_discards;
+ tg3_stat64_t tx_errors;
+
+ u64 __unused1[31];
+
+ /* Statistics maintained by Receive List Placement. */
+ tg3_stat64_t COS_rx_packets[16];
+ tg3_stat64_t COS_rx_filter_dropped;
+ tg3_stat64_t dma_writeq_full;
+ tg3_stat64_t dma_write_prioq_full;
+ tg3_stat64_t rxbds_empty;
+ tg3_stat64_t rx_discards;
+ tg3_stat64_t rx_errors;
+ tg3_stat64_t rx_threshold_hit;
+
+ u64 __unused2[9];
+
+ /* Statistics maintained by Send Data Initiator. */
+ tg3_stat64_t COS_out_packets[16];
+ tg3_stat64_t dma_readq_full;
+ tg3_stat64_t dma_read_prioq_full;
+ tg3_stat64_t tx_comp_queue_full;
+
+ /* Statistics maintained by Host Coalescing. */
+ tg3_stat64_t ring_set_send_prod_index;
+ tg3_stat64_t ring_status_update;
+ tg3_stat64_t nic_irqs;
+ tg3_stat64_t nic_avoided_irqs;
+ tg3_stat64_t nic_tx_threshold_hit;
+
+ u8 __reserved4[0xb00-0x9c0];
+};
+
+enum phy_led_mode {
+ led_mode_auto,
+ led_mode_three_link,
+ led_mode_link10
+};
+
+/* 'mapping' is superfluous as the chip does not write into
+ * the tx/rx post rings so we could just fetch it from there.
+ * But the cache behavior is better how we are doing it now.
+ */
+struct ring_info {
+ struct sk_buff *skb;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+struct tx_ring_info {
+ struct sk_buff *skb;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+ u32 prev_vlan_tag;
+};
+
+struct tg3_config_info {
+ u32 flags;
+};
+
+struct tg3_link_config {
+ /* Describes what we're trying to get. */
+ u32 advertising;
+ u16 speed;
+ u8 duplex;
+ u8 autoneg;
+
+ /* Describes what we actually have. */
+ u16 active_speed;
+ u8 active_duplex;
+#define SPEED_INVALID 0xffff
+#define DUPLEX_INVALID 0xff
+#define AUTONEG_INVALID 0xff
+
+ /* When we go in and out of low power mode we need
+ * to swap with this state.
+ */
+ int phy_is_low_power;
+ u16 orig_speed;
+ u8 orig_duplex;
+ u8 orig_autoneg;
+};
+
+struct tg3_bufmgr_config {
+ u32 mbuf_read_dma_low_water;
+ u32 mbuf_mac_rx_low_water;
+ u32 mbuf_high_water;
+
+ u32 mbuf_read_dma_low_water_jumbo;
+ u32 mbuf_mac_rx_low_water_jumbo;
+ u32 mbuf_high_water_jumbo;
+
+ u32 dma_low_water;
+ u32 dma_high_water;
+};
+
+struct tg3 {
+ /* begin "general, frequently-used members" cacheline section */
+
+ /* SMP locking strategy:
+ *
+ * lock: Held during all operations except TX packet
+ * processing.
+ *
+ * tx_lock: Held during tg3_start_xmit{,_4gbug} and tg3_tx
+ *
+ * If you want to shut up all asynchronous processing you must
+ * acquire both locks, 'lock' taken before 'tx_lock'. IRQs must
+ * be disabled to take 'lock' but only softirq disabling is
+ * necessary for acquisition of 'tx_lock'.
+ */
+ spinlock_t lock;
+ spinlock_t indirect_lock;
+
+ unsigned long regs;
+ struct net_device *dev;
+ struct pci_dev *pdev;
+
+ struct tg3_hw_status *hw_status;
+ dma_addr_t status_mapping;
+
+ u32 msg_enable;
+
+ /* begin "tx thread" cacheline section */
+ u32 tx_prod;
+ u32 tx_cons;
+ u32 tx_pending;
+
+ spinlock_t tx_lock;
+
+ /* TX descs are only used if TG3_FLAG_HOST_TXDS is set. */
+ struct tg3_tx_buffer_desc *tx_ring;
+ struct tx_ring_info *tx_buffers;
+ dma_addr_t tx_desc_mapping;
+
+ /* begin "rx thread" cacheline section */
+ u32 rx_rcb_ptr;
+ u32 rx_std_ptr;
+ u32 rx_jumbo_ptr;
+ u32 rx_pending;
+ u32 rx_jumbo_pending;
+#if TG3_VLAN_TAG_USED
+ struct vlan_group *vlgrp;
+#endif
+
+ struct tg3_rx_buffer_desc *rx_std;
+ struct ring_info *rx_std_buffers;
+ dma_addr_t rx_std_mapping;
+
+ struct tg3_rx_buffer_desc *rx_jumbo;
+ struct ring_info *rx_jumbo_buffers;
+ dma_addr_t rx_jumbo_mapping;
+
+ struct tg3_rx_buffer_desc *rx_rcb;
+ dma_addr_t rx_rcb_mapping;
+
+ /* begin "everything else" cacheline(s) section */
+ struct net_device_stats net_stats;
+ struct net_device_stats net_stats_prev;
+ unsigned long phy_crc_errors;
+
+ u32 rx_offset;
+ u32 tg3_flags;
+#define TG3_FLAG_HOST_TXDS 0x00000001
+#define TG3_FLAG_TXD_MBOX_HWBUG 0x00000002
+#define TG3_FLAG_RX_CHECKSUMS 0x00000004
+#define TG3_FLAG_USE_LINKCHG_REG 0x00000008
+#define TG3_FLAG_USE_MI_INTERRUPT 0x00000010
+#define TG3_FLAG_ENABLE_ASF 0x00000020
+#define TG3_FLAG_POLL_SERDES 0x00000080
+#define TG3_FLAG_MBOX_WRITE_REORDER 0x00000100
+#define TG3_FLAG_PCIX_TARGET_HWBUG 0x00000200
+#define TG3_FLAG_WOL_SPEED_100MB 0x00000400
+#define TG3_FLAG_WOL_ENABLE 0x00000800
+#define TG3_FLAG_EEPROM_WRITE_PROT 0x00001000
+#define TG3_FLAG_NVRAM 0x00002000
+#define TG3_FLAG_NVRAM_BUFFERED 0x00004000
+#define TG3_FLAG_RX_PAUSE 0x00008000
+#define TG3_FLAG_TX_PAUSE 0x00010000
+#define TG3_FLAG_PCIX_MODE 0x00020000
+#define TG3_FLAG_PCI_HIGH_SPEED 0x00040000
+#define TG3_FLAG_PCI_32BIT 0x00080000
+#define TG3_FLAG_NO_TX_PSEUDO_CSUM 0x00100000
+#define TG3_FLAG_NO_RX_PSEUDO_CSUM 0x00200000
+#define TG3_FLAG_SERDES_WOL_CAP 0x00400000
+#define TG3_FLAG_JUMBO_ENABLE 0x00800000
+#define TG3_FLAG_10_100_ONLY 0x01000000
+#define TG3_FLAG_PAUSE_AUTONEG 0x02000000
+#define TG3_FLAG_PAUSE_RX 0x04000000
+#define TG3_FLAG_PAUSE_TX 0x08000000
+#define TG3_FLAG_BROKEN_CHECKSUMS 0x10000000
+#define TG3_FLAG_GOT_SERDES_FLOWCTL 0x20000000
+#define TG3_FLAG_SPLIT_MODE 0x40000000
+#define TG3_FLAG_INIT_COMPLETE 0x80000000
+
+ u32 split_mode_max_reqs;
+#define SPLIT_MODE_5704_MAX_REQ 3
+
+ struct timer_list timer;
+ u16 timer_counter;
+ u16 timer_multiplier;
+ u32 timer_offset;
+ u16 asf_counter;
+ u16 asf_multiplier;
+
+ struct tg3_link_config link_config;
+ struct tg3_bufmgr_config bufmgr_config;
+
+ /* cache h/w values, often passed straight to h/w */
+ u32 rx_mode;
+ u32 tx_mode;
+ u32 mac_mode;
+ u32 mi_mode;
+ u32 misc_host_ctrl;
+ u32 grc_mode;
+ u32 grc_local_ctrl;
+ u32 dma_rwctrl;
+ u32 coalesce_mode;
+
+ /* PCI block */
+ u16 pci_chip_rev_id;
+ u8 pci_cacheline_sz;
+ u8 pci_lat_timer;
+ u8 pci_hdr_type;
+ u8 pci_bist;
+ u32 pci_cfg_state[64 / sizeof(u32)];
+
+ int pm_cap;
+
+ /* PHY info */
+ u32 phy_id;
+#define PHY_ID_MASK 0xfffffff0
+#define PHY_ID_BCM5400 0x60008040
+#define PHY_ID_BCM5401 0x60008050
+#define PHY_ID_BCM5411 0x60008070
+#define PHY_ID_BCM5701 0x60008110
+#define PHY_ID_BCM5703 0x60008160
+#define PHY_ID_BCM5704 0x60008190
+#define PHY_ID_BCM8002 0x60010140
+#define PHY_ID_SERDES 0xfeedbee0
+#define PHY_ID_INVALID 0xffffffff
+#define PHY_ID_REV_MASK 0x0000000f
+#define PHY_REV_BCM5401_B0 0x1
+#define PHY_REV_BCM5401_B2 0x3
+#define PHY_REV_BCM5401_C0 0x6
+#define PHY_REV_BCM5411_X0 0x1 /* Found on Netgear GA302T */
+
+ enum phy_led_mode led_mode;
+
+ char board_part_number[24];
+
+ /* This macro assumes the passed PHY ID is already masked
+ * with PHY_ID_MASK.
+ */
+#define KNOWN_PHY_ID(X) \
+ ((X) == PHY_ID_BCM5400 || (X) == PHY_ID_BCM5401 || \
+ (X) == PHY_ID_BCM5411 || (X) == PHY_ID_BCM5701 || \
+ (X) == PHY_ID_BCM5703 || (X) == PHY_ID_BCM5704 || \
+ (X) == PHY_ID_BCM8002 || (X) == PHY_ID_SERDES)
+
+ struct tg3_hw_stats *hw_stats;
+ dma_addr_t stats_mapping;
+};
+
+#endif /* !(_T3_H) */
diff --git a/xen/drivers/pci/Makefile b/xen/drivers/pci/Makefile
new file mode 100644
index 0000000000..1d811d45e3
--- /dev/null
+++ b/xen/drivers/pci/Makefile
@@ -0,0 +1,44 @@
+#
+# Makefile for the PCI bus specific drivers.
+#
+
+include $(BASEDIR)/Rules.mk
+
+OBJS := pci.o quirks.o compat.o names.o setup-res.o
+
+#obj-$(CONFIG_PCI) += pci.o quirks.o compat.o names.o
+#obj-$(CONFIG_PROC_FS) += proc.o
+
+#ifndef CONFIG_SPARC64
+#obj-$(CONFIG_PCI) += setup-res.o
+#endif
+
+#
+# Some architectures use the generic PCI setup functions
+#
+#obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_PARISC) += setup-bus.o
+#obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_ALL_PPC) += setup-bus.o
+#obj-$(CONFIG_DDB5476) += setup-bus.o
+#obj-$(CONFIG_SGI_IP27) += setup-irq.o
+
+#ifndef CONFIG_X86
+#obj-y += syscall.o
+#endif
+
+default: $(OBJS)
+ $(LD) -r -o driver.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core gen-devlist classlist.h devlist.h
+
+names.o: names.c devlist.h classlist.h
+
+devlist.h classlist.h: pci.ids gen-devlist
+ ./gen-devlist <pci.ids
+
+gen-devlist: gen-devlist.c
+ $(HOSTCC) $(HOSTCFLAGS) -o gen-devlist gen-devlist.c
+
diff --git a/xen/drivers/pci/compat.c b/xen/drivers/pci/compat.c
new file mode 100644
index 0000000000..e035f860ea
--- /dev/null
+++ b/xen/drivers/pci/compat.c
@@ -0,0 +1,65 @@
+/*
+ * $Id: compat.c,v 1.1 1998/02/16 10:35:50 mj Exp $
+ *
+ * PCI Bus Services -- Function For Backward Compatibility
+ *
+ * Copyright 1998--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+
+int
+pcibios_present(void)
+{
+ return !list_empty(&pci_devices);
+}
+
+int
+pcibios_find_class(unsigned int class, unsigned short index, unsigned char *bus, unsigned char *devfn)
+{
+ const struct pci_dev *dev = NULL;
+ int cnt = 0;
+
+ while ((dev = pci_find_class(class, dev)))
+ if (index == cnt++) {
+ *bus = dev->bus->number;
+ *devfn = dev->devfn;
+ return PCIBIOS_SUCCESSFUL;
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+
+int
+pcibios_find_device(unsigned short vendor, unsigned short device, unsigned short index,
+ unsigned char *bus, unsigned char *devfn)
+{
+ const struct pci_dev *dev = NULL;
+ int cnt = 0;
+
+ while ((dev = pci_find_device(vendor, device, dev)))
+ if (index == cnt++) {
+ *bus = dev->bus->number;
+ *devfn = dev->devfn;
+ return PCIBIOS_SUCCESSFUL;
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+#define PCI_OP(rw,size,type) \
+int pcibios_##rw##_config_##size (unsigned char bus, unsigned char dev_fn, \
+ unsigned char where, unsigned type val) \
+{ \
+ struct pci_dev *dev = pci_find_slot(bus, dev_fn); \
+ if (!dev) return PCIBIOS_DEVICE_NOT_FOUND; \
+ return pci_##rw##_config_##size(dev, where, val); \
+}
+
+PCI_OP(read, byte, char *)
+PCI_OP(read, word, short *)
+PCI_OP(read, dword, int *)
+PCI_OP(write, byte, char)
+PCI_OP(write, word, short)
+PCI_OP(write, dword, int)
diff --git a/xen/drivers/pci/gen-devlist.c b/xen/drivers/pci/gen-devlist.c
new file mode 100644
index 0000000000..c0c242010e
--- /dev/null
+++ b/xen/drivers/pci/gen-devlist.c
@@ -0,0 +1,130 @@
+/*
+ * Generate devlist.h and classlist.h from the PCI ID file.
+ *
+ * (c) 1999--2002 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_NAME_SIZE 79
+
+static void
+pq(FILE *f, const char *c)
+{
+ while (*c) {
+ if (*c == '"')
+ fprintf(f, "\\\"");
+ else {
+ fputc(*c, f);
+ if (*c == '?' && c[1] == '?') {
+ /* Avoid trigraphs */
+ fprintf(f, "\" \"");
+ }
+ }
+ c++;
+ }
+}
+
+int
+main(void)
+{
+ char line[1024], *c, *bra, vend[8];
+ int vendors = 0;
+ int mode = 0;
+ int lino = 0;
+ int vendor_len = 0;
+ FILE *devf, *clsf;
+
+ devf = fopen("devlist.h", "w");
+ clsf = fopen("classlist.h", "w");
+ if (!devf || !clsf) {
+ fprintf(stderr, "Cannot create output file!\n");
+ return 1;
+ }
+
+ while (fgets(line, sizeof(line)-1, stdin)) {
+ lino++;
+ if ((c = strchr(line, '\n')))
+ *c = 0;
+ if (!line[0] || line[0] == '#')
+ continue;
+ if (line[1] == ' ') {
+ if (line[0] == 'C' && strlen(line) > 4 && line[4] == ' ') {
+ vend[0] = line[2];
+ vend[1] = line[3];
+ vend[2] = 0;
+ mode = 2;
+ } else goto err;
+ }
+ else if (line[0] == '\t') {
+ if (line[1] == '\t')
+ continue;
+ switch (mode) {
+ case 1:
+ if (strlen(line) > 5 && line[5] == ' ') {
+ c = line + 5;
+ while (*c == ' ')
+ *c++ = 0;
+ if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
+ /* Too long, try cutting off long description */
+ bra = strchr(c, '[');
+ if (bra && bra > c && bra[-1] == ' ')
+ bra[-1] = 0;
+ if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
+ fprintf(stderr, "Line %d: Device name too long\n", lino);
+ fprintf(stderr, "%s\n", c);
+ return 1;
+ }
+ }
+ fprintf(devf, "\tDEVICE(%s,%s,\"", vend, line+1);
+ pq(devf, c);
+ fputs("\")\n", devf);
+ } else goto err;
+ break;
+ case 2:
+ if (strlen(line) > 3 && line[3] == ' ') {
+ c = line + 3;
+ while (*c == ' ')
+ *c++ = 0;
+ fprintf(clsf, "CLASS(%s%s, \"%s\")\n", vend, line+1, c);
+ } else goto err;
+ break;
+ default:
+ goto err;
+ }
+ } else if (strlen(line) > 4 && line[4] == ' ') {
+ c = line + 4;
+ while (*c == ' ')
+ *c++ = 0;
+ if (vendors)
+ fputs("ENDVENDOR()\n\n", devf);
+ vendors++;
+ strcpy(vend, line);
+ vendor_len = strlen(c);
+ if (vendor_len + 24 > MAX_NAME_SIZE) {
+ fprintf(stderr, "Line %d: Vendor name too long\n", lino);
+ return 1;
+ }
+ fprintf(devf, "VENDOR(%s,\"", vend);
+ pq(devf, c);
+ fputs("\")\n", devf);
+ mode = 1;
+ } else {
+ err:
+ fprintf(stderr, "Line %d: Syntax error in mode %d: %s\n", lino, mode, line);
+ return 1;
+ }
+ }
+ fputs("ENDVENDOR()\n\
+\n\
+#undef VENDOR\n\
+#undef DEVICE\n\
+#undef ENDVENDOR\n", devf);
+ fputs("\n#undef CLASS\n", clsf);
+
+ fclose(devf);
+ fclose(clsf);
+
+ return 0;
+}
diff --git a/xen/drivers/pci/names.c b/xen/drivers/pci/names.c
new file mode 100644
index 0000000000..80674543b0
--- /dev/null
+++ b/xen/drivers/pci/names.c
@@ -0,0 +1,135 @@
+/*
+ * PCI Class and Device Name Tables
+ *
+ * Copyright 1993--1999 Drew Eckhardt, Frederic Potter,
+ * David Mosberger-Tang, Martin Mares
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_PCI_NAMES
+
+struct pci_device_info {
+ unsigned short device;
+ unsigned short seen;
+ const char *name;
+};
+
+struct pci_vendor_info {
+ unsigned short vendor;
+ unsigned short nr;
+ const char *name;
+ struct pci_device_info *devices;
+};
+
+/*
+ * This is ridiculous, but we want the strings in
+ * the .init section so that they don't take up
+ * real memory.. Parse the same file multiple times
+ * to get all the info.
+ */
+#define VENDOR( vendor, name ) static char __vendorstr_##vendor[] __devinitdata = name;
+#define ENDVENDOR()
+#define DEVICE( vendor, device, name ) static char __devicestr_##vendor##device[] __devinitdata = name;
+#include "devlist.h"
+
+
+#define VENDOR( vendor, name ) static struct pci_device_info __devices_##vendor[] __devinitdata = {
+#define ENDVENDOR() };
+#define DEVICE( vendor, device, name ) { 0x##device, 0, __devicestr_##vendor##device },
+#include "devlist.h"
+
+static struct pci_vendor_info __devinitdata pci_vendor_list[] = {
+#define VENDOR( vendor, name ) { 0x##vendor, sizeof(__devices_##vendor) / sizeof(struct pci_device_info), __vendorstr_##vendor, __devices_##vendor },
+#define ENDVENDOR()
+#define DEVICE( vendor, device, name )
+#include "devlist.h"
+};
+
+#define VENDORS (sizeof(pci_vendor_list)/sizeof(struct pci_vendor_info))
+
+void __devinit pci_name_device(struct pci_dev *dev)
+{
+ const struct pci_vendor_info *vendor_p = pci_vendor_list;
+ int i = VENDORS;
+ char *name = dev->name;
+
+ do {
+ if (vendor_p->vendor == dev->vendor)
+ goto match_vendor;
+ vendor_p++;
+ } while (--i);
+
+ /* Couldn't find either the vendor nor the device */
+ sprintf(name, "PCI device %04x:%04x", dev->vendor, dev->device);
+ return;
+
+ match_vendor: {
+ struct pci_device_info *device_p = vendor_p->devices;
+ int i = vendor_p->nr;
+
+ while (i > 0) {
+ if (device_p->device == dev->device)
+ goto match_device;
+ device_p++;
+ i--;
+ }
+
+ /* Ok, found the vendor, but unknown device */
+ sprintf(name, "PCI device %04x:%04x (%s)", dev->vendor, dev->device, vendor_p->name);
+ return;
+
+ /* Full match */
+ match_device: {
+ char *n = name + sprintf(name, "%s %s", vendor_p->name, device_p->name);
+ int nr = device_p->seen + 1;
+ device_p->seen = nr;
+ if (nr > 1)
+ sprintf(n, " (#%d)", nr);
+ }
+ }
+}
+
+/*
+ * Class names. Not in .init section as they are needed in runtime.
+ */
+
+static u16 pci_class_numbers[] = {
+#define CLASS(x,y) 0x##x,
+#include "classlist.h"
+};
+
+static char *pci_class_names[] = {
+#define CLASS(x,y) y,
+#include "classlist.h"
+};
+
+char *
+pci_class_name(u32 class)
+{
+ int i;
+
+ for(i=0; i<sizeof(pci_class_numbers)/sizeof(pci_class_numbers[0]); i++)
+ if (pci_class_numbers[i] == class)
+ return pci_class_names[i];
+ return NULL;
+}
+
+#else
+
+void __devinit pci_name_device(struct pci_dev *dev)
+{
+}
+
+char *
+pci_class_name(u32 class)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_PCI_NAMES */
+
diff --git a/xen/drivers/pci/pci.c b/xen/drivers/pci/pci.c
new file mode 100644
index 0000000000..134e3e2c83
--- /dev/null
+++ b/xen/drivers/pci/pci.c
@@ -0,0 +1,2217 @@
+/*
+ * $Id: pci.c,v 1.91 1999/01/21 13:34:01 davem Exp $
+ *
+ * PCI Bus Services, see include/linux/pci.h for further explanation.
+ *
+ * Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter,
+ * David Mosberger-Tang
+ *
+ * Copyright 1997 -- 2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+/*#include <linux/string.h>*/
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/spinlock.h>
+/*#include <linux/pm.h>*/
+/*#include <linux/kmod.h>*/ /* for hotplug_path */
+/*#include <linux/bitops.h>*/
+#include <linux/delay.h>
+#include <linux/cache.h>
+
+#include <asm/page.h>
+/*#include <asm/dma.h>*/ /* isa_dma_bridge_buggy */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+LIST_HEAD(pci_root_buses);
+LIST_HEAD(pci_devices);
+
+/**
+ * pci_find_slot - locate PCI device from a given PCI slot
+ * @bus: number of PCI bus on which desired PCI device resides
+ * @devfn: encodes number of PCI slot in which the desired PCI
+ * device resides and the logical device number within that slot
+ * in case of multi-function devices.
+ *
+ * Given a PCI bus and slot/function number, the desired PCI device
+ * is located in system global list of PCI devices. If the device
+ * is found, a pointer to its data structure is returned. If no
+ * device is found, %NULL is returned.
+ */
+struct pci_dev *
+pci_find_slot(unsigned int bus, unsigned int devfn)
+{
+ struct pci_dev *dev;
+
+ pci_for_each_dev(dev) {
+ if (dev->bus->number == bus && dev->devfn == devfn)
+ return dev;
+ }
+ return NULL;
+}
+
+/**
+ * pci_find_subsys - begin or continue searching for a PCI device by vendor/subvendor/device/subdevice id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @ss_vendor: PCI subsystem vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @ss_device: PCI subsystem device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices. If a PCI device is
+ * found with a matching @vendor, @device, @ss_vendor and @ss_device, a pointer to its
+ * device structure is returned. Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device on the global list.
+ */
+struct pci_dev *
+pci_find_subsys(unsigned int vendor, unsigned int device,
+ unsigned int ss_vendor, unsigned int ss_device,
+ const struct pci_dev *from)
+{
+ struct list_head *n = from ? from->global_list.next : pci_devices.next;
+
+ while (n != &pci_devices) {
+ struct pci_dev *dev = pci_dev_g(n);
+ if ((vendor == PCI_ANY_ID || dev->vendor == vendor) &&
+ (device == PCI_ANY_ID || dev->device == device) &&
+ (ss_vendor == PCI_ANY_ID || dev->subsystem_vendor == ss_vendor) &&
+ (ss_device == PCI_ANY_ID || dev->subsystem_device == ss_device))
+ return dev;
+ n = n->next;
+ }
+ return NULL;
+}
+
+
+/**
+ * pci_find_device - begin or continue searching for a PCI device by vendor/device id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices. If a PCI device is
+ * found with a matching @vendor and @device, a pointer to its device structure is
+ * returned. Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device on the global list.
+ */
+struct pci_dev *
+pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
+{
+ return pci_find_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
+}
+
+
+/**
+ * pci_find_class - begin or continue searching for a PCI device by class
+ * @class: search for a PCI device with this class designation
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices. If a PCI device is
+ * found with a matching @class, a pointer to its device structure is
+ * returned. Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device
+ * on the global list.
+ */
+struct pci_dev *
+pci_find_class(unsigned int class, const struct pci_dev *from)
+{
+ struct list_head *n = from ? from->global_list.next : pci_devices.next;
+
+ while (n != &pci_devices) {
+ struct pci_dev *dev = pci_dev_g(n);
+ if (dev->class == class)
+ return dev;
+ n = n->next;
+ }
+ return NULL;
+}
+
+/**
+ * pci_find_capability - query for devices' capabilities
+ * @dev: PCI device to query
+ * @cap: capability code
+ *
+ * Tell if a device supports a given PCI capability.
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it. Possible values for @cap:
+ *
+ * %PCI_CAP_ID_PM Power Management
+ *
+ * %PCI_CAP_ID_AGP Accelerated Graphics Port
+ *
+ * %PCI_CAP_ID_VPD Vital Product Data
+ *
+ * %PCI_CAP_ID_SLOTID Slot Identification
+ *
+ * %PCI_CAP_ID_MSI Message Signalled Interrupts
+ *
+ * %PCI_CAP_ID_CHSWP CompactPCI HotSwap
+ *
+ * %PCI_CAP_ID_PCIX PCI-X
+ */
+int
+pci_find_capability(struct pci_dev *dev, int cap)
+{
+ u16 status;
+ u8 pos, id;
+ int ttl = 48;
+
+ pci_read_config_word(dev, PCI_STATUS, &status);
+ if (!(status & PCI_STATUS_CAP_LIST))
+ return 0;
+ switch (dev->hdr_type) {
+ case PCI_HEADER_TYPE_NORMAL:
+ case PCI_HEADER_TYPE_BRIDGE:
+ pci_read_config_byte(dev, PCI_CAPABILITY_LIST, &pos);
+ break;
+ case PCI_HEADER_TYPE_CARDBUS:
+ pci_read_config_byte(dev, PCI_CB_CAPABILITY_LIST, &pos);
+ break;
+ default:
+ return 0;
+ }
+ while (ttl-- && pos >= 0x40) {
+ pos &= ~3;
+ pci_read_config_byte(dev, pos + PCI_CAP_LIST_ID, &id);
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+ pci_read_config_byte(dev, pos + PCI_CAP_LIST_NEXT, &pos);
+ }
+ return 0;
+}
+
+
+/**
+ * pci_find_parent_resource - return resource region of parent bus of given region
+ * @dev: PCI device structure contains resources to be searched
+ * @res: child resource record for which parent is sought
+ *
+ * For given resource region of given device, return the resource
+ * region of parent bus the given region is contained in or where
+ * it should be allocated from.
+ */
+struct resource *
+pci_find_parent_resource(const struct pci_dev *dev, struct resource *res)
+{
+ const struct pci_bus *bus = dev->bus;
+ int i;
+ struct resource *best = NULL;
+
+ for(i=0; i<4; i++) {
+ struct resource *r = bus->resource[i];
+ if (!r)
+ continue;
+ if (res->start && !(res->start >= r->start && res->end <= r->end))
+ continue; /* Not contained */
+ if ((res->flags ^ r->flags) & (IORESOURCE_IO | IORESOURCE_MEM))
+ continue; /* Wrong type */
+ if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH))
+ return r; /* Exact match */
+ if ((res->flags & IORESOURCE_PREFETCH) && !(r->flags & IORESOURCE_PREFETCH))
+ best = r; /* Approximating prefetchable by non-prefetchable */
+ }
+ return best;
+}
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to be suspended
+ * @state: Power state we're entering
+ *
+ * Transition a device to a new power state, using the Power Management
+ * Capabilities in the device's config space.
+ *
+ * RETURN VALUE:
+ * -EINVAL if trying to enter a lower state than we're already in.
+ * 0 if we're already in the requested state.
+ * -EIO if device does not support PCI PM.
+ * 0 if we can successfully change the power state.
+ */
+
+int
+pci_set_power_state(struct pci_dev *dev, int state)
+{
+ int pm;
+ u16 pmcsr;
+
+ /* bound the state we're entering */
+ if (state > 3) state = 3;
+
+ /* Validate current state:
+ * Can enter D0 from any state, but if we can only go deeper
+ * to sleep if we're already in a low power state
+ */
+ if (state > 0 && dev->current_state > state)
+ return -EINVAL;
+ else if (dev->current_state == state)
+ return 0; /* we're already there */
+
+ /* find PCI PM capability in list */
+ pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+
+ /* abort if the device doesn't support PM capabilities */
+ if (!pm) return -EIO;
+
+ /* check if this device supports the desired state */
+ if (state == 1 || state == 2) {
+ u16 pmc;
+ pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc);
+ if (state == 1 && !(pmc & PCI_PM_CAP_D1)) return -EIO;
+ else if (state == 2 && !(pmc & PCI_PM_CAP_D2)) return -EIO;
+ }
+
+ /* If we're in D3, force entire word to 0.
+ * This doesn't affect PME_Status, disables PME_En, and
+ * sets PowerState to 0.
+ */
+ if (dev->current_state >= 3)
+ pmcsr = 0;
+ else {
+ pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr);
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ pmcsr |= state;
+ }
+
+ /* enter specified state */
+ pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr);
+
+ /* Mandatory power management transition delays */
+ /* see PCI PM 1.1 5.6.1 table 18 */
+ if(state == 3 || dev->current_state == 3)
+ {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ/100);
+ }
+ else if(state == 2 || dev->current_state == 2)
+ udelay(200);
+ dev->current_state = state;
+
+ return 0;
+}
+
+/**
+ * pci_save_state - save the PCI configuration space of a device before suspending
+ * @dev: - PCI device that we're dealing with
+ * @buffer: - buffer to hold config space context
+ *
+ * @buffer must be large enough to hold the entire PCI 2.2 config space
+ * (>= 64 bytes).
+ */
+int
+pci_save_state(struct pci_dev *dev, u32 *buffer)
+{
+ int i;
+ if (buffer) {
+ /* XXX: 100% dword access ok here? */
+ for (i = 0; i < 16; i++)
+ pci_read_config_dword(dev, i * 4,&buffer[i]);
+ }
+ return 0;
+}
+
+/**
+ * pci_restore_state - Restore the saved state of a PCI device
+ * @dev: - PCI device that we're dealing with
+ * @buffer: - saved PCI config space
+ *
+ */
+int
+pci_restore_state(struct pci_dev *dev, u32 *buffer)
+{
+ int i;
+
+ if (buffer) {
+ for (i = 0; i < 16; i++)
+ pci_write_config_dword(dev,i * 4, buffer[i]);
+ }
+ /*
+ * otherwise, write the context information we know from bootup.
+ * This works around a problem where warm-booting from Windows
+ * combined with a D3(hot)->D0 transition causes PCI config
+ * header data to be forgotten.
+ */
+ else {
+ for (i = 0; i < 6; i ++)
+ pci_write_config_dword(dev,
+ PCI_BASE_ADDRESS_0 + (i * 4),
+ dev->resource[i].start);
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+ }
+ return 0;
+}
+
+/**
+ * pci_enable_device_bars - Initialize some of a device for use
+ * @dev: PCI device to be initialized
+ * @bars: bitmask of BAR's that must be configured
+ *
+ * Initialize device before it's used by a driver. Ask low-level code
+ * to enable selected I/O and memory resources. Wake up the device if it
+ * was suspended. Beware, this function can fail.
+ */
+
+int
+pci_enable_device_bars(struct pci_dev *dev, int bars)
+{
+ int err;
+
+ pci_set_power_state(dev, 0);
+ if ((err = pcibios_enable_device(dev, bars)) < 0)
+ return err;
+ return 0;
+}
+
+/**
+ * pci_enable_device - Initialize device before it's used by a driver.
+ * @dev: PCI device to be initialized
+ *
+ * Initialize device before it's used by a driver. Ask low-level code
+ * to enable I/O and memory. Wake up the device if it was suspended.
+ * Beware, this function can fail.
+ */
+int
+pci_enable_device(struct pci_dev *dev)
+{
+ return pci_enable_device_bars(dev, 0x3F);
+}
+
+/**
+ * pci_disable_device - Disable PCI device after use
+ * @dev: PCI device to be disabled
+ *
+ * Signal to the system that the PCI device is not in use by the system
+ * anymore. This only involves disabling PCI bus-mastering, if active.
+ */
+void
+pci_disable_device(struct pci_dev *dev)
+{
+ u16 pci_command;
+
+ pci_read_config_word(dev, PCI_COMMAND, &pci_command);
+ if (pci_command & PCI_COMMAND_MASTER) {
+ pci_command &= ~PCI_COMMAND_MASTER;
+ pci_write_config_word(dev, PCI_COMMAND, pci_command);
+ }
+}
+
+/**
+ * pci_enable_wake - enable device to generate PME# when suspended
+ * @dev: - PCI device to operate on
+ * @state: - Current state of device.
+ * @enable: - Flag to enable or disable generation
+ *
+ * Set the bits in the device's PM Capabilities to generate PME# when
+ * the system is suspended.
+ *
+ * -EIO is returned if device doesn't have PM Capabilities.
+ * -EINVAL is returned if device supports it, but can't generate wake events.
+ * 0 if operation is successful.
+ *
+ */
+int pci_enable_wake(struct pci_dev *dev, u32 state, int enable)
+{
+ int pm;
+ u16 value;
+
+ /* find PCI PM capability in list */
+ pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+
+ /* If device doesn't support PM Capabilities, but request is to disable
+ * wake events, it's a nop; otherwise fail */
+ if (!pm)
+ return enable ? -EIO : 0;
+
+ /* Check device's ability to generate PME# */
+ pci_read_config_word(dev,pm+PCI_PM_PMC,&value);
+
+ value &= PCI_PM_CAP_PME_MASK;
+ value >>= ffs(value); /* First bit of mask */
+
+ /* Check if it can generate PME# from requested state. */
+ if (!value || !(value & (1 << state)))
+ return enable ? -EINVAL : 0;
+
+ pci_read_config_word(dev, pm + PCI_PM_CTRL, &value);
+
+ /* Clear PME_Status by writing 1 to it and enable PME# */
+ value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
+
+ if (!enable)
+ value &= ~PCI_PM_CTRL_PME_ENABLE;
+
+ pci_write_config_word(dev, pm + PCI_PM_CTRL, value);
+
+ return 0;
+}
+
+int
+pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
+{
+ u8 pin;
+
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (!pin)
+ return -1;
+ pin--;
+ while (dev->bus->self) {
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ dev = dev->bus->self;
+ }
+ *bridge = dev;
+ return pin;
+}
+
+/**
+ * pci_release_region - Release a PCI bar
+ * @pdev: PCI device whose resources were previously reserved by pci_request_region
+ * @bar: BAR to release
+ *
+ * Releases the PCI I/O and memory resources previously reserved by a
+ * successful call to pci_request_region. Call this function only
+ * after all use of the PCI regions has ceased.
+ */
+void pci_release_region(struct pci_dev *pdev, int bar)
+{
+ if (pci_resource_len(pdev, bar) == 0)
+ return;
+ if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
+ release_region(pci_resource_start(pdev, bar),
+ pci_resource_len(pdev, bar));
+ else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
+ release_mem_region(pci_resource_start(pdev, bar),
+ pci_resource_len(pdev, bar));
+}
+
+/**
+ * pci_request_region - Reserved PCI I/O and memory resource
+ * @pdev: PCI device whose resources are to be reserved
+ * @bar: BAR to be reserved
+ * @res_name: Name to be associated with resource.
+ *
+ * Mark the PCI region associated with PCI device @pdev BR @bar as
+ * being reserved by owner @res_name. Do not access any
+ * address inside the PCI regions unless this call returns
+ * successfully.
+ *
+ * Returns 0 on success, or %EBUSY on error. A warning
+ * message is also printed on failure.
+ */
+int pci_request_region(struct pci_dev *pdev, int bar, char *res_name)
+{
+ if (pci_resource_len(pdev, bar) == 0)
+ return 0;
+
+ if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
+ if (!request_region(pci_resource_start(pdev, bar),
+ pci_resource_len(pdev, bar), res_name))
+ goto err_out;
+ }
+ else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
+ if (!request_mem_region(pci_resource_start(pdev, bar),
+ pci_resource_len(pdev, bar), res_name))
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n",
+ pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
+ bar + 1, /* PCI BAR # */
+ pci_resource_len(pdev, bar), pci_resource_start(pdev, bar),
+ pdev->slot_name);
+ return -EBUSY;
+}
+
+
+/**
+ * pci_release_regions - Release reserved PCI I/O and memory resources
+ * @pdev: PCI device whose resources were previously reserved by pci_request_regions
+ *
+ * Releases all PCI I/O and memory resources previously reserved by a
+ * successful call to pci_request_regions. Call this function only
+ * after all use of the PCI regions has ceased.
+ */
+
+void pci_release_regions(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = 0; i < 6; i++)
+ pci_release_region(pdev, i);
+}
+
+/**
+ * pci_request_regions - Reserved PCI I/O and memory resources
+ * @pdev: PCI device whose resources are to be reserved
+ * @res_name: Name to be associated with resource.
+ *
+ * Mark all PCI regions associated with PCI device @pdev as
+ * being reserved by owner @res_name. Do not access any
+ * address inside the PCI regions unless this call returns
+ * successfully.
+ *
+ * Returns 0 on success, or %EBUSY on error. A warning
+ * message is also printed on failure.
+ */
+int pci_request_regions(struct pci_dev *pdev, char *res_name)
+{
+ int i;
+
+ for (i = 0; i < 6; i++)
+ if(pci_request_region(pdev, i, res_name))
+ goto err_out;
+ return 0;
+
+err_out:
+ printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n",
+ pci_resource_flags(pdev, i) & IORESOURCE_IO ? "I/O" : "mem",
+ i + 1, /* PCI BAR # */
+ pci_resource_len(pdev, i), pci_resource_start(pdev, i),
+ pdev->slot_name);
+ while(--i >= 0)
+ pci_release_region(pdev, i);
+
+ return -EBUSY;
+}
+
+
+/*
+ * Registration of PCI drivers and handling of hot-pluggable devices.
+ */
+
+static LIST_HEAD(pci_drivers);
+
+/**
+ * pci_match_device - Tell if a PCI device structure has a matching PCI device id structure
+ * @ids: array of PCI device id structures to search in
+ * @dev: the PCI device structure to match against
+ *
+ * Used by a driver to check whether a PCI device present in the
+ * system is in its list of supported devices.Returns the matching
+ * pci_device_id structure or %NULL if there is no match.
+ */
+const struct pci_device_id *
+pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev)
+{
+ while (ids->vendor || ids->subvendor || ids->class_mask) {
+ if ((ids->vendor == PCI_ANY_ID || ids->vendor == dev->vendor) &&
+ (ids->device == PCI_ANY_ID || ids->device == dev->device) &&
+ (ids->subvendor == PCI_ANY_ID || ids->subvendor == dev->subsystem_vendor) &&
+ (ids->subdevice == PCI_ANY_ID || ids->subdevice == dev->subsystem_device) &&
+ !((ids->class ^ dev->class) & ids->class_mask))
+ return ids;
+ ids++;
+ }
+ return NULL;
+}
+
+static int
+pci_announce_device(struct pci_driver *drv, struct pci_dev *dev)
+{
+ const struct pci_device_id *id;
+ int ret = 0;
+
+ if (drv->id_table) {
+ id = pci_match_device(drv->id_table, dev);
+ if (!id) {
+ ret = 0;
+ goto out;
+ }
+ } else
+ id = NULL;
+
+ dev_probe_lock();
+ if (drv->probe(dev, id) >= 0) {
+ dev->driver = drv;
+ ret = 1;
+ }
+ dev_probe_unlock();
+out:
+ return ret;
+}
+
+/**
+ * pci_register_driver - register a new pci driver
+ * @drv: the driver structure to register
+ *
+ * Adds the driver structure to the list of registered drivers
+ * Returns the number of pci devices which were claimed by the driver
+ * during registration. The driver remains registered even if the
+ * return value is zero.
+ */
+int
+pci_register_driver(struct pci_driver *drv)
+{
+ struct pci_dev *dev;
+ int count = 0;
+
+ list_add_tail(&drv->node, &pci_drivers);
+ pci_for_each_dev(dev) {
+ if (!pci_dev_driver(dev))
+ count += pci_announce_device(drv, dev);
+ }
+ return count;
+}
+
+/**
+ * pci_unregister_driver - unregister a pci driver
+ * @drv: the driver structure to unregister
+ *
+ * Deletes the driver structure from the list of registered PCI drivers,
+ * gives it a chance to clean up by calling its remove() function for
+ * each device it was responsible for, and marks those devices as
+ * driverless.
+ */
+
+void
+pci_unregister_driver(struct pci_driver *drv)
+{
+ struct pci_dev *dev;
+
+ list_del(&drv->node);
+ pci_for_each_dev(dev) {
+ if (dev->driver == drv) {
+ if (drv->remove)
+ drv->remove(dev);
+ dev->driver = NULL;
+ }
+ }
+}
+
+#ifdef CONFIG_HOTPLUG
+
+#ifndef FALSE
+#define FALSE (0)
+#define TRUE (!FALSE)
+#endif
+
+static void
+run_sbin_hotplug(struct pci_dev *pdev, int insert)
+{
+ int i;
+ char *argv[3], *envp[8];
+ char id[20], sub_id[24], bus_id[24], class_id[20];
+
+ if (!hotplug_path[0])
+ return;
+
+ sprintf(class_id, "PCI_CLASS=%04X", pdev->class);
+ sprintf(id, "PCI_ID=%04X:%04X", pdev->vendor, pdev->device);
+ sprintf(sub_id, "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, pdev->subsystem_device);
+ sprintf(bus_id, "PCI_SLOT_NAME=%s", pdev->slot_name);
+
+ i = 0;
+ argv[i++] = hotplug_path;
+ argv[i++] = "pci";
+ argv[i] = 0;
+
+ i = 0;
+ /* minimal command environment */
+ envp[i++] = "HOME=/";
+ envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+
+ /* other stuff we want to pass to /sbin/hotplug */
+ envp[i++] = class_id;
+ envp[i++] = id;
+ envp[i++] = sub_id;
+ envp[i++] = bus_id;
+ if (insert)
+ envp[i++] = "ACTION=add";
+ else
+ envp[i++] = "ACTION=remove";
+ envp[i] = 0;
+
+ call_usermodehelper (argv [0], argv, envp);
+}
+
+/**
+ * pci_announce_device_to_drivers - tell the drivers a new device has appeared
+ * @dev: the device that has shown up
+ *
+ * Notifys the drivers that a new device has appeared, and also notifys
+ * userspace through /sbin/hotplug.
+ */
+void
+pci_announce_device_to_drivers(struct pci_dev *dev)
+{
+ struct list_head *ln;
+
+ for(ln=pci_drivers.next; ln != &pci_drivers; ln=ln->next) {
+ struct pci_driver *drv = list_entry(ln, struct pci_driver, node);
+ if (drv->remove && pci_announce_device(drv, dev))
+ break;
+ }
+
+ /* notify userspace of new hotplug device */
+ run_sbin_hotplug(dev, TRUE);
+}
+
+/**
+ * pci_insert_device - insert a hotplug device
+ * @dev: the device to insert
+ * @bus: where to insert it
+ *
+ * Add a new device to the device lists and notify userspace (/sbin/hotplug).
+ */
+void
+pci_insert_device(struct pci_dev *dev, struct pci_bus *bus)
+{
+ list_add_tail(&dev->bus_list, &bus->devices);
+ list_add_tail(&dev->global_list, &pci_devices);
+#ifdef CONFIG_PROC_FS
+ pci_proc_attach_device(dev);
+#endif
+ pci_announce_device_to_drivers(dev);
+}
+
+static void
+pci_free_resources(struct pci_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = dev->resource + i;
+ if (res->parent)
+ release_resource(res);
+ }
+}
+
+/**
+ * pci_remove_device - remove a hotplug device
+ * @dev: the device to remove
+ *
+ * Delete the device structure from the device lists and
+ * notify userspace (/sbin/hotplug).
+ */
+void
+pci_remove_device(struct pci_dev *dev)
+{
+ if (dev->driver) {
+ if (dev->driver->remove)
+ dev->driver->remove(dev);
+ dev->driver = NULL;
+ }
+ list_del(&dev->bus_list);
+ list_del(&dev->global_list);
+ pci_free_resources(dev);
+#ifdef CONFIG_PROC_FS
+ pci_proc_detach_device(dev);
+#endif
+
+ /* notify userspace of hotplug device removal */
+ run_sbin_hotplug(dev, FALSE);
+}
+
+#endif
+
+static struct pci_driver pci_compat_driver = {
+ name: "compat"
+};
+
+/**
+ * pci_dev_driver - get the pci_driver of a device
+ * @dev: the device to query
+ *
+ * Returns the appropriate pci_driver structure or %NULL if there is no
+ * registered driver for the device.
+ */
+struct pci_driver *
+pci_dev_driver(const struct pci_dev *dev)
+{
+ if (dev->driver)
+ return dev->driver;
+ else {
+ int i;
+ for(i=0; i<=PCI_ROM_RESOURCE; i++)
+ if (dev->resource[i].flags & IORESOURCE_BUSY)
+ return &pci_compat_driver;
+ }
+ return NULL;
+}
+
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+
+static spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Wrappers for all PCI configuration access functions. They just check
+ * alignment, do locking and call the low-level functions pointed to
+ * by pci_dev->ops.
+ */
+
+#define PCI_byte_BAD 0
+#define PCI_word_BAD (pos & 1)
+#define PCI_dword_BAD (pos & 3)
+
+#define PCI_OP(rw,size,type) \
+int pci_##rw##_config_##size (struct pci_dev *dev, int pos, type value) \
+{ \
+ int res; \
+ unsigned long flags; \
+ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ spin_lock_irqsave(&pci_lock, flags); \
+ res = dev->bus->ops->rw##_##size(dev, pos, value); \
+ spin_unlock_irqrestore(&pci_lock, flags); \
+ return res; \
+}
+
+PCI_OP(read, byte, u8 *)
+PCI_OP(read, word, u16 *)
+PCI_OP(read, dword, u32 *)
+PCI_OP(write, byte, u8)
+PCI_OP(write, word, u16)
+PCI_OP(write, dword, u32)
+
+/**
+ * pci_set_master - enables bus-mastering for device dev
+ * @dev: the PCI device to enable
+ *
+ * Enables bus-mastering on the device and calls pcibios_set_master()
+ * to do the needed arch specific settings.
+ */
+void
+pci_set_master(struct pci_dev *dev)
+{
+ u16 cmd;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (! (cmd & PCI_COMMAND_MASTER)) {
+ DBG("PCI: Enabling bus mastering for device %s\n", dev->slot_name);
+ cmd |= PCI_COMMAND_MASTER;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ pcibios_set_master(dev);
+}
+
+/**
+ * pdev_set_mwi - arch helper function for pcibios_set_mwi
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Helper function for implementation the arch-specific pcibios_set_mwi
+ * function. Originally copied from drivers/net/acenic.c.
+ * Copyright 1998-2001 by Jes Sorensen, <jes@trained-monkey.org>.
+ *
+ * RETURNS: An appriopriate -ERRNO error value on eror, or zero for success.
+ */
+int
+pdev_set_mwi(struct pci_dev *dev)
+{
+ int rc = 0;
+ u8 cache_size;
+
+ /*
+ * Looks like this is necessary to deal with on all architectures,
+ * even this %$#%$# N440BX Intel based thing doesn't get it right.
+ * Ie. having two NICs in the machine, one will have the cache
+ * line set at boot time, the other will not.
+ */
+ pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cache_size);
+ cache_size <<= 2;
+ if (cache_size != SMP_CACHE_BYTES) {
+ printk(KERN_WARNING "PCI: %s PCI cache line size set incorrectly (%i bytes) by BIOS/FW.\n",
+ dev->slot_name, cache_size);
+ if (cache_size > SMP_CACHE_BYTES) {
+ printk("PCI: %s cache line size too large - expecting %i.\n", dev->slot_name, SMP_CACHE_BYTES);
+ rc = -EINVAL;
+ } else {
+ printk("PCI: %s PCI cache line size corrected to %i.\n", dev->slot_name, SMP_CACHE_BYTES);
+ pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
+ SMP_CACHE_BYTES >> 2);
+ }
+ }
+
+ return rc;
+}
+
+/**
+ * pci_set_mwi - enables memory-write-invalidate PCI transaction
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Enables the Memory-Write-Invalidate transaction in %PCI_COMMAND,
+ * and then calls @pcibios_set_mwi to do the needed arch specific
+ * operations or a generic mwi-prep function.
+ *
+ * RETURNS: An appriopriate -ERRNO error value on eror, or zero for success.
+ */
+int
+pci_set_mwi(struct pci_dev *dev)
+{
+ int rc;
+ u16 cmd;
+
+#ifdef HAVE_ARCH_PCI_MWI
+ rc = pcibios_set_mwi(dev);
+#else
+ rc = pdev_set_mwi(dev);
+#endif
+
+ if (rc)
+ return rc;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (! (cmd & PCI_COMMAND_INVALIDATE)) {
+ DBG("PCI: Enabling Mem-Wr-Inval for device %s\n", dev->slot_name);
+ cmd |= PCI_COMMAND_INVALIDATE;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+
+ return 0;
+}
+
+/**
+ * pci_clear_mwi - disables Memory-Write-Invalidate for device dev
+ * @dev: the PCI device to disable
+ *
+ * Disables PCI Memory-Write-Invalidate transaction on the device
+ */
+void
+pci_clear_mwi(struct pci_dev *dev)
+{
+ u16 cmd;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (cmd & PCI_COMMAND_INVALIDATE) {
+ cmd &= ~PCI_COMMAND_INVALIDATE;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+}
+
+int
+pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+ if (!pci_dma_supported(dev, mask))
+ return -EIO;
+
+ dev->dma_mask = mask;
+
+ return 0;
+}
+
+int
+pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+ if (!pci_dac_dma_supported(dev, mask))
+ return -EIO;
+
+ dev->dma_mask = mask;
+
+ return 0;
+}
+
+/*
+ * Translate the low bits of the PCI base
+ * to the resource type
+ */
+static inline unsigned int pci_calc_resource_flags(unsigned int flags)
+{
+ if (flags & PCI_BASE_ADDRESS_SPACE_IO)
+ return IORESOURCE_IO;
+
+ if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
+ return IORESOURCE_MEM | IORESOURCE_PREFETCH;
+
+ return IORESOURCE_MEM;
+}
+
+/*
+ * Find the extent of a PCI decode, do sanity checks.
+ */
+static u32 pci_size(u32 base, u32 maxbase, unsigned long mask)
+{
+ u32 size = mask & maxbase; /* Find the significant bits */
+ if (!size)
+ return 0;
+ size = size & ~(size-1); /* Get the lowest of them to find the decode size */
+ size -= 1; /* extent = size - 1 */
+ if (base == maxbase && ((base | size) & mask) != mask)
+ return 0; /* base == maxbase can be valid only
+ if the BAR has been already
+ programmed with all 1s */
+ return size;
+}
+
+static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
+{
+ unsigned int pos, reg, next;
+ u32 l, sz;
+ struct resource *res;
+
+ for(pos=0; pos<howmany; pos = next) {
+ next = pos+1;
+ res = &dev->resource[pos];
+ res->name = dev->name;
+ reg = PCI_BASE_ADDRESS_0 + (pos << 2);
+ pci_read_config_dword(dev, reg, &l);
+ pci_write_config_dword(dev, reg, ~0);
+ pci_read_config_dword(dev, reg, &sz);
+ pci_write_config_dword(dev, reg, l);
+ if (!sz || sz == 0xffffffff)
+ continue;
+ if (l == 0xffffffff)
+ l = 0;
+ if ((l & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) {
+ sz = pci_size(l, sz, PCI_BASE_ADDRESS_MEM_MASK);
+ if (!sz)
+ continue;
+ res->start = l & PCI_BASE_ADDRESS_MEM_MASK;
+ res->flags |= l & ~PCI_BASE_ADDRESS_MEM_MASK;
+ } else {
+ sz = pci_size(l, sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff);
+ if (!sz)
+ continue;
+ res->start = l & PCI_BASE_ADDRESS_IO_MASK;
+ res->flags |= l & ~PCI_BASE_ADDRESS_IO_MASK;
+ }
+ res->end = res->start + (unsigned long) sz;
+ res->flags |= pci_calc_resource_flags(l);
+ if ((l & (PCI_BASE_ADDRESS_SPACE | PCI_BASE_ADDRESS_MEM_TYPE_MASK))
+ == (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64)) {
+ pci_read_config_dword(dev, reg+4, &l);
+ next++;
+#if BITS_PER_LONG == 64
+ res->start |= ((unsigned long) l) << 32;
+ res->end = res->start + sz;
+ pci_write_config_dword(dev, reg+4, ~0);
+ pci_read_config_dword(dev, reg+4, &sz);
+ pci_write_config_dword(dev, reg+4, l);
+ if (~sz)
+ res->end = res->start + 0xffffffff +
+ (((unsigned long) ~sz) << 32);
+#else
+ if (l) {
+ printk(KERN_ERR "PCI: Unable to handle 64-bit address for device %s\n", dev->slot_name);
+ res->start = 0;
+ res->flags = 0;
+ continue;
+ }
+#endif
+ }
+ }
+ if (rom) {
+ dev->rom_base_reg = rom;
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ res->name = dev->name;
+ pci_read_config_dword(dev, rom, &l);
+ pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE);
+ pci_read_config_dword(dev, rom, &sz);
+ pci_write_config_dword(dev, rom, l);
+ if (l == 0xffffffff)
+ l = 0;
+ if (sz && sz != 0xffffffff) {
+ sz = pci_size(l, sz, PCI_ROM_ADDRESS_MASK);
+ if (!sz)
+ return;
+ res->flags = (l & PCI_ROM_ADDRESS_ENABLE) |
+ IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ res->start = l & PCI_ROM_ADDRESS_MASK;
+ res->end = res->start + (unsigned long) sz;
+ }
+ }
+}
+
+void __devinit pci_read_bridge_bases(struct pci_bus *child)
+{
+ struct pci_dev *dev = child->self;
+ u8 io_base_lo, io_limit_lo;
+ u16 mem_base_lo, mem_limit_lo;
+ unsigned long base, limit;
+ struct resource *res;
+ int i;
+
+ if (!dev) /* It's a host bus, nothing to read */
+ return;
+
+ if (dev->transparent) {
+ printk("Transparent bridge - %s\n", dev->name);
+ for(i = 0; i < 4; i++)
+ child->resource[i] = child->parent->resource[i];
+ return;
+ }
+
+ for(i=0; i<3; i++)
+ child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+
+ res = child->resource[0];
+ pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
+ pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
+ base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
+ limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
+
+ if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
+ u16 io_base_hi, io_limit_hi;
+ pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
+ pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
+ base |= (io_base_hi << 16);
+ limit |= (io_limit_hi << 16);
+ }
+
+ if (base && base <= limit) {
+ res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+ res->start = base;
+ res->end = limit + 0xfff;
+ }
+
+ res = child->resource[1];
+ pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
+ pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
+ base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
+ limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
+ if (base && base <= limit) {
+ res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
+ res->start = base;
+ res->end = limit + 0xfffff;
+ }
+
+ res = child->resource[2];
+ pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
+ pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
+ base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
+ limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
+
+ if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+ u32 mem_base_hi, mem_limit_hi;
+ pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
+ pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
+#if BITS_PER_LONG == 64
+ base |= ((long) mem_base_hi) << 32;
+ limit |= ((long) mem_limit_hi) << 32;
+#else
+ if (mem_base_hi || mem_limit_hi) {
+ printk(KERN_ERR "PCI: Unable to handle 64-bit address space for %s\n", child->name);
+ return;
+ }
+#endif
+ }
+ if (base && base <= limit) {
+ res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
+ res->start = base;
+ res->end = limit + 0xfffff;
+ }
+}
+
+static struct pci_bus * __devinit pci_alloc_bus(void)
+{
+ struct pci_bus *b;
+
+ b = kmalloc(sizeof(*b), GFP_KERNEL);
+ if (b) {
+ memset(b, 0, sizeof(*b));
+ INIT_LIST_HEAD(&b->children);
+ INIT_LIST_HEAD(&b->devices);
+ }
+ return b;
+}
+
+struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr)
+{
+ struct pci_bus *child;
+ int i;
+
+ /*
+ * Allocate a new bus, and inherit stuff from the parent..
+ */
+ child = pci_alloc_bus();
+
+ list_add_tail(&child->node, &parent->children);
+ child->self = dev;
+ dev->subordinate = child;
+ child->parent = parent;
+ child->ops = parent->ops;
+ child->sysdata = parent->sysdata;
+
+ /*
+ * Set up the primary, secondary and subordinate
+ * bus numbers.
+ */
+ child->number = child->secondary = busnr;
+ child->primary = parent->secondary;
+ child->subordinate = 0xff;
+
+ /* Set up default resource pointers and names.. */
+ for (i = 0; i < 4; i++) {
+ child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+ child->resource[i]->name = child->name;
+ }
+
+ return child;
+}
+
+unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus);
+
+/*
+ * If it's a bridge, configure it and scan the bus behind it.
+ * For CardBus bridges, we don't scan behind as the devices will
+ * be handled by the bridge driver itself.
+ *
+ * We need to process bridges in two passes -- first we scan those
+ * already configured by the BIOS and after we are done with all of
+ * them, we proceed to assigning numbers to the remaining buses in
+ * order to avoid overlaps between old and new bus numbers.
+ */
+static int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
+{
+ unsigned int buses;
+ unsigned short cr;
+ struct pci_bus *child;
+ int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
+
+ pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
+ DBG("Scanning behind PCI bridge %s, config %06x, pass %d\n", dev->slot_name, buses & 0xffffff, pass);
+ if ((buses & 0xffff00) && !pcibios_assign_all_busses()) {
+ /*
+ * Bus already configured by firmware, process it in the first
+ * pass and just note the configuration.
+ */
+ if (pass)
+ return max;
+ child = pci_add_new_bus(bus, dev, 0);
+ child->primary = buses & 0xFF;
+ child->secondary = (buses >> 8) & 0xFF;
+ child->subordinate = (buses >> 16) & 0xFF;
+ child->number = child->secondary;
+ if (!is_cardbus) {
+ unsigned int cmax = pci_do_scan_bus(child);
+ if (cmax > max) max = cmax;
+ } else {
+ unsigned int cmax = child->subordinate;
+ if (cmax > max) max = cmax;
+ }
+ } else {
+ /*
+ * We need to assign a number to this bus which we always
+ * do in the second pass. We also keep all address decoders
+ * on the bridge disabled during scanning. FIXME: Why?
+ */
+ if (!pass)
+ return max;
+ pci_read_config_word(dev, PCI_COMMAND, &cr);
+ pci_write_config_word(dev, PCI_COMMAND, 0x0000);
+ pci_write_config_word(dev, PCI_STATUS, 0xffff);
+
+ child = pci_add_new_bus(bus, dev, ++max);
+ buses = (buses & 0xff000000)
+ | ((unsigned int)(child->primary) << 0)
+ | ((unsigned int)(child->secondary) << 8)
+ | ((unsigned int)(child->subordinate) << 16);
+ /*
+ * We need to blast all three values with a single write.
+ */
+ pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
+ if (!is_cardbus) {
+ /* Now we can scan all subordinate buses... */
+ max = pci_do_scan_bus(child);
+ } else {
+ /*
+ * For CardBus bridges, we leave 4 bus numbers
+ * as cards with a PCI-to-PCI bridge can be
+ * inserted later.
+ */
+ max += 3;
+ }
+ /*
+ * Set the subordinate bus number to its real value.
+ */
+ child->subordinate = max;
+ pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
+ pci_write_config_word(dev, PCI_COMMAND, cr);
+ }
+ sprintf(child->name, (is_cardbus ? "PCI CardBus #%02x" : "PCI Bus #%02x"), child->number);
+ return max;
+}
+
+/*
+ * Read interrupt line and base address registers.
+ * The architecture-dependent code can tweak these, of course.
+ */
+static void pci_read_irq(struct pci_dev *dev)
+{
+ unsigned char irq;
+
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
+ if (irq)
+ pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+ dev->irq = irq;
+}
+
+/**
+ * pci_setup_device - fill in class and map information of a device
+ * @dev: the device structure to fill
+ *
+ * Initialize the device structure with information about the device's
+ * vendor,class,memory and IO-space addresses,IRQ lines etc.
+ * Called at initialisation of the PCI subsystem and by CardBus services.
+ * Returns 0 on success and -1 if unknown type of device (not normal, bridge
+ * or CardBus).
+ */
+int pci_setup_device(struct pci_dev * dev)
+{
+ u32 class;
+
+ sprintf(dev->slot_name, "%02x:%02x.%d", dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+ sprintf(dev->name, "PCI device %04x:%04x", dev->vendor, dev->device);
+
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+ class >>= 8; /* upper 3 bytes */
+ dev->class = class;
+ class >>= 8;
+
+ DBG("Found %02x:%02x [%04x/%04x] %06x %02x\n", dev->bus->number, dev->devfn, dev->vendor, dev->device, class, dev->hdr_type);
+
+ /* "Unknown power state" */
+ dev->current_state = 4;
+
+ switch (dev->hdr_type) { /* header type */
+ case PCI_HEADER_TYPE_NORMAL: /* standard header */
+ if (class == PCI_CLASS_BRIDGE_PCI)
+ goto bad;
+ pci_read_irq(dev);
+ pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
+ pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
+ pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device);
+ break;
+
+ case PCI_HEADER_TYPE_BRIDGE: /* bridge header */
+ if (class != PCI_CLASS_BRIDGE_PCI)
+ goto bad;
+ /* The PCI-to-PCI bridge spec requires that subtractive
+ decoding (i.e. transparent) bridge must have programming
+ interface code of 0x01. */
+ dev->transparent = ((dev->class & 0xff) == 1);
+ pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
+ break;
+
+ case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */
+ if (class != PCI_CLASS_BRIDGE_CARDBUS)
+ goto bad;
+ pci_read_irq(dev);
+ pci_read_bases(dev, 1, 0);
+ pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
+ pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
+ break;
+
+ default: /* unknown header */
+ printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n",
+ dev->slot_name, dev->hdr_type);
+ return -1;
+
+ bad:
+ printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n",
+ dev->slot_name, class, dev->hdr_type);
+ dev->class = PCI_CLASS_NOT_DEFINED;
+ }
+
+ /* We found a fine healthy device, go go go... */
+ return 0;
+}
+
+/*
+ * Read the config data for a PCI device, sanity-check it
+ * and fill in the dev structure...
+ */
+struct pci_dev * __devinit pci_scan_device(struct pci_dev *temp)
+{
+ struct pci_dev *dev;
+ u32 l;
+
+ if (pci_read_config_dword(temp, PCI_VENDOR_ID, &l))
+ return NULL;
+
+ /* some broken boards return 0 or ~0 if a slot is empty: */
+ if (l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000)
+ return NULL;
+
+ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+
+ memcpy(dev, temp, sizeof(*dev));
+ dev->vendor = l & 0xffff;
+ dev->device = (l >> 16) & 0xffff;
+
+ /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+ set this higher, assuming the system even supports it. */
+ dev->dma_mask = 0xffffffff;
+ if (pci_setup_device(dev) < 0) {
+ kfree(dev);
+ dev = NULL;
+ }
+ return dev;
+}
+
+struct pci_dev * __devinit pci_scan_slot(struct pci_dev *temp)
+{
+ struct pci_bus *bus = temp->bus;
+ struct pci_dev *dev;
+ struct pci_dev *first_dev = NULL;
+ int func = 0;
+ int is_multi = 0;
+ u8 hdr_type;
+
+ for (func = 0; func < 8; func++, temp->devfn++) {
+ if (func && !is_multi) /* not a multi-function device */
+ continue;
+ if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
+ continue;
+ temp->hdr_type = hdr_type & 0x7f;
+
+ dev = pci_scan_device(temp);
+ if (!dev)
+ continue;
+ pci_name_device(dev);
+ if (!func) {
+ is_multi = hdr_type & 0x80;
+ first_dev = dev;
+ }
+
+ /*
+ * Link the device to both the global PCI device chain and
+ * the per-bus list of devices.
+ */
+ list_add_tail(&dev->global_list, &pci_devices);
+ list_add_tail(&dev->bus_list, &bus->devices);
+
+ /* Fix up broken headers */
+ pci_fixup_device(PCI_FIXUP_HEADER, dev);
+ }
+ return first_dev;
+}
+
+unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus)
+{
+ unsigned int devfn, max, pass;
+ struct list_head *ln;
+ struct pci_dev *dev, dev0;
+
+ DBG("Scanning bus %02x\n", bus->number);
+ max = bus->secondary;
+
+ /* Create a device template */
+ memset(&dev0, 0, sizeof(dev0));
+ dev0.bus = bus;
+ dev0.sysdata = bus->sysdata;
+
+ /* Go find them, Rover! */
+ for (devfn = 0; devfn < 0x100; devfn += 8) {
+ dev0.devfn = devfn;
+ pci_scan_slot(&dev0);
+ }
+
+ /*
+ * After performing arch-dependent fixup of the bus, look behind
+ * all PCI-to-PCI bridges on this bus.
+ */
+ DBG("Fixups for bus %02x\n", bus->number);
+ pcibios_fixup_bus(bus);
+ for (pass=0; pass < 2; pass++)
+ for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+ dev = pci_dev_b(ln);
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ max = pci_scan_bridge(bus, dev, max, pass);
+ }
+
+ /*
+ * We've scanned the bus and so we know all about what's on
+ * the other side of any bridges that may be on this bus plus
+ * any devices.
+ *
+ * Return how far we've got finding sub-buses.
+ */
+ DBG("Bus scan for %02x returning with max=%02x\n", bus->number, max);
+ return max;
+}
+
+int __devinit pci_bus_exists(const struct list_head *list, int nr)
+{
+ const struct list_head *l;
+
+ for(l=list->next; l != list; l = l->next) {
+ const struct pci_bus *b = pci_bus_b(l);
+ if (b->number == nr || pci_bus_exists(&b->children, nr))
+ return 1;
+ }
+ return 0;
+}
+
+struct pci_bus * __devinit pci_alloc_primary_bus(int bus)
+{
+ struct pci_bus *b;
+
+ if (pci_bus_exists(&pci_root_buses, bus)) {
+ /* If we already got to this bus through a different bridge, ignore it */
+ DBG("PCI: Bus %02x already known\n", bus);
+ return NULL;
+ }
+
+ b = pci_alloc_bus();
+ list_add_tail(&b->node, &pci_root_buses);
+
+ b->number = b->secondary = bus;
+ b->resource[0] = &ioport_resource;
+ b->resource[1] = &iomem_resource;
+ return b;
+}
+
+struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata)
+{
+ struct pci_bus *b = pci_alloc_primary_bus(bus);
+ if (b) {
+ b->sysdata = sysdata;
+ b->ops = ops;
+ b->subordinate = pci_do_scan_bus(b);
+ }
+ return b;
+}
+
+#ifdef CONFIG_PM
+
+/*
+ * PCI Power management..
+ *
+ * This needs to be done centralized, so that we power manage PCI
+ * devices in the right order: we should not shut down PCI bridges
+ * before we've shut down the devices behind them, and we should
+ * not wake up devices before we've woken up the bridge to the
+ * device.. Eh?
+ *
+ * We do not touch devices that don't have a driver that exports
+ * a suspend/resume function. That is just too dangerous. If the default
+ * PCI suspend/resume functions work for a device, the driver can
+ * easily implement them (ie just have a suspend function that calls
+ * the pci_set_power_state() function).
+ */
+
+static int pci_pm_save_state_device(struct pci_dev *dev, u32 state)
+{
+ int error = 0;
+ if (dev) {
+ struct pci_driver *driver = dev->driver;
+ if (driver && driver->save_state)
+ error = driver->save_state(dev,state);
+ }
+ return error;
+}
+
+static int pci_pm_suspend_device(struct pci_dev *dev, u32 state)
+{
+ int error = 0;
+ if (dev) {
+ struct pci_driver *driver = dev->driver;
+ if (driver && driver->suspend)
+ error = driver->suspend(dev,state);
+ }
+ return error;
+}
+
+static int pci_pm_resume_device(struct pci_dev *dev)
+{
+ int error = 0;
+ if (dev) {
+ struct pci_driver *driver = dev->driver;
+ if (driver && driver->resume)
+ error = driver->resume(dev);
+ }
+ return error;
+}
+
+static int pci_pm_save_state_bus(struct pci_bus *bus, u32 state)
+{
+ struct list_head *list;
+ int error = 0;
+
+ list_for_each(list, &bus->children) {
+ error = pci_pm_save_state_bus(pci_bus_b(list),state);
+ if (error) return error;
+ }
+ list_for_each(list, &bus->devices) {
+ error = pci_pm_save_state_device(pci_dev_b(list),state);
+ if (error) return error;
+ }
+ return 0;
+}
+
+static int pci_pm_suspend_bus(struct pci_bus *bus, u32 state)
+{
+ struct list_head *list;
+
+ /* Walk the bus children list */
+ list_for_each(list, &bus->children)
+ pci_pm_suspend_bus(pci_bus_b(list),state);
+
+ /* Walk the device children list */
+ list_for_each(list, &bus->devices)
+ pci_pm_suspend_device(pci_dev_b(list),state);
+ return 0;
+}
+
+static int pci_pm_resume_bus(struct pci_bus *bus)
+{
+ struct list_head *list;
+
+ /* Walk the device children list */
+ list_for_each(list, &bus->devices)
+ pci_pm_resume_device(pci_dev_b(list));
+
+ /* And then walk the bus children */
+ list_for_each(list, &bus->children)
+ pci_pm_resume_bus(pci_bus_b(list));
+ return 0;
+}
+
+static int pci_pm_save_state(u32 state)
+{
+ struct list_head *list;
+ struct pci_bus *bus;
+ int error = 0;
+
+ list_for_each(list, &pci_root_buses) {
+ bus = pci_bus_b(list);
+ error = pci_pm_save_state_bus(bus,state);
+ if (!error)
+ error = pci_pm_save_state_device(bus->self,state);
+ }
+ return error;
+}
+
+static int pci_pm_suspend(u32 state)
+{
+ struct list_head *list;
+ struct pci_bus *bus;
+
+ list_for_each(list, &pci_root_buses) {
+ bus = pci_bus_b(list);
+ pci_pm_suspend_bus(bus,state);
+ pci_pm_suspend_device(bus->self,state);
+ }
+ return 0;
+}
+
+int pci_pm_resume(void)
+{
+ struct list_head *list;
+ struct pci_bus *bus;
+
+ list_for_each(list, &pci_root_buses) {
+ bus = pci_bus_b(list);
+ pci_pm_resume_device(bus->self);
+ pci_pm_resume_bus(bus);
+ }
+ return 0;
+}
+
+static int
+pci_pm_callback(struct pm_dev *pm_device, pm_request_t rqst, void *data)
+{
+ int error = 0;
+
+ switch (rqst) {
+ case PM_SAVE_STATE:
+ error = pci_pm_save_state((unsigned long)data);
+ break;
+ case PM_SUSPEND:
+ error = pci_pm_suspend((unsigned long)data);
+ break;
+ case PM_RESUME:
+ error = pci_pm_resume();
+ break;
+ default: break;
+ }
+ return error;
+}
+
+#endif
+
+
+#if 0 /* XXX KAF: Only USB uses this stuff -- I think we'll just bin it. */
+
+/*
+ * Pool allocator ... wraps the pci_alloc_consistent page allocator, so
+ * small blocks are easily used by drivers for bus mastering controllers.
+ * This should probably be sharing the guts of the slab allocator.
+ */
+
+struct pci_pool { /* the pool */
+ struct list_head page_list;
+ spinlock_t lock;
+ size_t blocks_per_page;
+ size_t size;
+ int flags;
+ struct pci_dev *dev;
+ size_t allocation;
+ char name [32];
+ wait_queue_head_t waitq;
+};
+
+struct pci_page { /* cacheable header for 'allocation' bytes */
+ struct list_head page_list;
+ void *vaddr;
+ dma_addr_t dma;
+ unsigned long bitmap [0];
+};
+
+#define POOL_TIMEOUT_JIFFIES ((100 /* msec */ * HZ) / 1000)
+#define POOL_POISON_BYTE 0xa7
+
+// #define CONFIG_PCIPOOL_DEBUG
+
+
+/**
+ * pci_pool_create - Creates a pool of pci consistent memory blocks, for dma.
+ * @name: name of pool, for diagnostics
+ * @pdev: pci device that will be doing the DMA
+ * @size: size of the blocks in this pool.
+ * @align: alignment requirement for blocks; must be a power of two
+ * @allocation: returned blocks won't cross this boundary (or zero)
+ * @flags: SLAB_* flags (not all are supported).
+ *
+ * Returns a pci allocation pool with the requested characteristics, or
+ * null if one can't be created. Given one of these pools, pci_pool_alloc()
+ * may be used to allocate memory. Such memory will all have "consistent"
+ * DMA mappings, accessible by the device and its driver without using
+ * cache flushing primitives. The actual size of blocks allocated may be
+ * larger than requested because of alignment.
+ *
+ * If allocation is nonzero, objects returned from pci_pool_alloc() won't
+ * cross that size boundary. This is useful for devices which have
+ * addressing restrictions on individual DMA transfers, such as not crossing
+ * boundaries of 4KBytes.
+ */
+struct pci_pool *
+pci_pool_create (const char *name, struct pci_dev *pdev,
+ size_t size, size_t align, size_t allocation, int flags)
+{
+ struct pci_pool *retval;
+
+ if (align == 0)
+ align = 1;
+ if (size == 0)
+ return 0;
+ else if (size < align)
+ size = align;
+ else if ((size % align) != 0) {
+ size += align + 1;
+ size &= ~(align - 1);
+ }
+
+ if (allocation == 0) {
+ if (PAGE_SIZE < size)
+ allocation = size;
+ else
+ allocation = PAGE_SIZE;
+ // FIXME: round up for less fragmentation
+ } else if (allocation < size)
+ return 0;
+
+ if (!(retval = kmalloc (sizeof *retval, flags)))
+ return retval;
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+ flags |= SLAB_POISON;
+#endif
+
+ strncpy (retval->name, name, sizeof retval->name);
+ retval->name [sizeof retval->name - 1] = 0;
+
+ retval->dev = pdev;
+ INIT_LIST_HEAD (&retval->page_list);
+ spin_lock_init (&retval->lock);
+ retval->size = size;
+ retval->flags = flags;
+ retval->allocation = allocation;
+ retval->blocks_per_page = allocation / size;
+ init_waitqueue_head (&retval->waitq);
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+ printk (KERN_DEBUG "pcipool create %s/%s size %d, %d/page (%d alloc)\n",
+ pdev ? pdev->slot_name : NULL, retval->name, size,
+ retval->blocks_per_page, allocation);
+#endif
+
+ return retval;
+}
+
+
+static struct pci_page *
+pool_alloc_page (struct pci_pool *pool, int mem_flags)
+{
+ struct pci_page *page;
+ int mapsize;
+
+ mapsize = pool->blocks_per_page;
+ mapsize = (mapsize + BITS_PER_LONG - 1) / BITS_PER_LONG;
+ mapsize *= sizeof (long);
+
+ page = (struct pci_page *) kmalloc (mapsize + sizeof *page, mem_flags);
+ if (!page)
+ return 0;
+ page->vaddr = pci_alloc_consistent (pool->dev,
+ pool->allocation,
+ &page->dma);
+ if (page->vaddr) {
+ memset (page->bitmap, 0xff, mapsize); // bit set == free
+ if (pool->flags & SLAB_POISON)
+ memset (page->vaddr, POOL_POISON_BYTE, pool->allocation);
+ list_add (&page->page_list, &pool->page_list);
+ } else {
+ kfree (page);
+ page = 0;
+ }
+ return page;
+}
+
+
+static inline int
+is_page_busy (int blocks, unsigned long *bitmap)
+{
+ while (blocks > 0) {
+ if (*bitmap++ != ~0UL)
+ return 1;
+ blocks -= BITS_PER_LONG;
+ }
+ return 0;
+}
+
+static void
+pool_free_page (struct pci_pool *pool, struct pci_page *page)
+{
+ dma_addr_t dma = page->dma;
+
+ if (pool->flags & SLAB_POISON)
+ memset (page->vaddr, POOL_POISON_BYTE, pool->allocation);
+ pci_free_consistent (pool->dev, pool->allocation, page->vaddr, dma);
+ list_del (&page->page_list);
+ kfree (page);
+}
+
+
+/**
+ * pci_pool_destroy - destroys a pool of pci memory blocks.
+ * @pool: pci pool that will be destroyed
+ *
+ * Caller guarantees that no more memory from the pool is in use,
+ * and that nothing will try to use the pool after this call.
+ */
+void
+pci_pool_destroy (struct pci_pool *pool)
+{
+ unsigned long flags;
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+ printk (KERN_DEBUG "pcipool destroy %s/%s\n",
+ pool->dev ? pool->dev->slot_name : NULL,
+ pool->name);
+#endif
+
+ spin_lock_irqsave (&pool->lock, flags);
+ while (!list_empty (&pool->page_list)) {
+ struct pci_page *page;
+ page = list_entry (pool->page_list.next,
+ struct pci_page, page_list);
+ if (is_page_busy (pool->blocks_per_page, page->bitmap)) {
+ printk (KERN_ERR "pci_pool_destroy %s/%s, %p busy\n",
+ pool->dev ? pool->dev->slot_name : NULL,
+ pool->name, page->vaddr);
+ /* leak the still-in-use consistent memory */
+ list_del (&page->page_list);
+ kfree (page);
+ } else
+ pool_free_page (pool, page);
+ }
+ spin_unlock_irqrestore (&pool->lock, flags);
+ kfree (pool);
+}
+
+
+/**
+ * pci_pool_alloc - get a block of consistent memory
+ * @pool: pci pool that will produce the block
+ * @mem_flags: SLAB_KERNEL or SLAB_ATOMIC
+ * @handle: pointer to dma address of block
+ *
+ * This returns the kernel virtual address of a currently unused block,
+ * and reports its dma address through the handle.
+ * If such a memory block can't be allocated, null is returned.
+ */
+void *
+pci_pool_alloc (struct pci_pool *pool, int mem_flags, dma_addr_t *handle)
+{
+ unsigned long flags;
+ struct list_head *entry;
+ struct pci_page *page;
+ int map, block;
+ size_t offset;
+ void *retval;
+
+restart:
+ spin_lock_irqsave (&pool->lock, flags);
+ list_for_each (entry, &pool->page_list) {
+ int i;
+ page = list_entry (entry, struct pci_page, page_list);
+ /* only cachable accesses here ... */
+ for (map = 0, i = 0;
+ i < pool->blocks_per_page;
+ i += BITS_PER_LONG, map++) {
+ if (page->bitmap [map] == 0)
+ continue;
+ block = ffz (~ page->bitmap [map]);
+ if ((i + block) < pool->blocks_per_page) {
+ clear_bit (block, &page->bitmap [map]);
+ offset = (BITS_PER_LONG * map) + block;
+ offset *= pool->size;
+ goto ready;
+ }
+ }
+ }
+ if (!(page = pool_alloc_page (pool, mem_flags))) {
+ if (mem_flags == SLAB_KERNEL) {
+ DECLARE_WAITQUEUE (wait, current);
+
+ current->state = TASK_INTERRUPTIBLE;
+ add_wait_queue (&pool->waitq, &wait);
+ spin_unlock_irqrestore (&pool->lock, flags);
+
+ schedule_timeout (POOL_TIMEOUT_JIFFIES);
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue (&pool->waitq, &wait);
+ goto restart;
+ }
+ retval = 0;
+ goto done;
+ }
+
+ clear_bit (0, &page->bitmap [0]);
+ offset = 0;
+ready:
+ retval = offset + page->vaddr;
+ *handle = offset + page->dma;
+done:
+ spin_unlock_irqrestore (&pool->lock, flags);
+ return retval;
+}
+
+
+static struct pci_page *
+pool_find_page (struct pci_pool *pool, dma_addr_t dma)
+{
+ unsigned long flags;
+ struct list_head *entry;
+ struct pci_page *page;
+
+ spin_lock_irqsave (&pool->lock, flags);
+ list_for_each (entry, &pool->page_list) {
+ page = list_entry (entry, struct pci_page, page_list);
+ if (dma < page->dma)
+ continue;
+ if (dma < (page->dma + pool->allocation))
+ goto done;
+ }
+ page = 0;
+done:
+ spin_unlock_irqrestore (&pool->lock, flags);
+ return page;
+}
+
+
+/**
+ * pci_pool_free - put block back into pci pool
+ * @pool: the pci pool holding the block
+ * @vaddr: virtual address of block
+ * @dma: dma address of block
+ *
+ * Caller promises neither device nor driver will again touch this block
+ * unless it is first re-allocated.
+ */
+void
+pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma)
+{
+ struct pci_page *page;
+ unsigned long flags;
+ int map, block;
+
+ if ((page = pool_find_page (pool, dma)) == 0) {
+ printk (KERN_ERR "pci_pool_free %s/%s, %p/%x (bad dma)\n",
+ pool->dev ? pool->dev->slot_name : NULL,
+ pool->name, vaddr, (int) (dma & 0xffffffff));
+ return;
+ }
+#ifdef CONFIG_PCIPOOL_DEBUG
+ if (((dma - page->dma) + (void *)page->vaddr) != vaddr) {
+ printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%x\n",
+ pool->dev ? pool->dev->slot_name : NULL,
+ pool->name, vaddr, (int) (dma & 0xffffffff));
+ return;
+ }
+#endif
+
+ block = dma - page->dma;
+ block /= pool->size;
+ map = block / BITS_PER_LONG;
+ block %= BITS_PER_LONG;
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+ if (page->bitmap [map] & (1UL << block)) {
+ printk (KERN_ERR "pci_pool_free %s/%s, dma %x already free\n",
+ pool->dev ? pool->dev->slot_name : NULL,
+ pool->name, dma);
+ return;
+ }
+#endif
+ if (pool->flags & SLAB_POISON)
+ memset (vaddr, POOL_POISON_BYTE, pool->size);
+
+ spin_lock_irqsave (&pool->lock, flags);
+ set_bit (block, &page->bitmap [map]);
+ if (waitqueue_active (&pool->waitq))
+ wake_up (&pool->waitq);
+ /*
+ * Resist a temptation to do
+ * if (!is_page_busy(bpp, page->bitmap)) pool_free_page(pool, page);
+ * it is not interrupt safe. Better have empty pages hang around.
+ */
+ spin_unlock_irqrestore (&pool->lock, flags);
+}
+
+#endif /* XXX End of PCI pool allocator stuff. */
+
+
+void __devinit pci_init(void)
+{
+ struct pci_dev *dev;
+
+ pcibios_init();
+
+ pci_for_each_dev(dev) {
+ pci_fixup_device(PCI_FIXUP_FINAL, dev);
+ }
+
+#ifdef CONFIG_PM
+ pm_register(PM_PCI_DEV, 0, pci_pm_callback);
+#endif
+}
+
+static int __devinit pci_setup(char *str)
+{
+ while (str) {
+ char *k = strchr(str, ',');
+ if (k)
+ *k++ = 0;
+ if (*str && (str = pcibios_setup(str)) && *str) {
+ /* PCI layer options should be handled here */
+ printk(KERN_ERR "PCI: Unknown option `%s'\n", str);
+ }
+ str = k;
+ }
+ return 1;
+}
+
+__setup("pci=", pci_setup);
+
+EXPORT_SYMBOL(pci_read_config_byte);
+EXPORT_SYMBOL(pci_read_config_word);
+EXPORT_SYMBOL(pci_read_config_dword);
+EXPORT_SYMBOL(pci_write_config_byte);
+EXPORT_SYMBOL(pci_write_config_word);
+EXPORT_SYMBOL(pci_write_config_dword);
+EXPORT_SYMBOL(pci_devices);
+EXPORT_SYMBOL(pci_root_buses);
+EXPORT_SYMBOL(pci_enable_device_bars);
+EXPORT_SYMBOL(pci_enable_device);
+EXPORT_SYMBOL(pci_disable_device);
+EXPORT_SYMBOL(pci_find_capability);
+EXPORT_SYMBOL(pci_release_regions);
+EXPORT_SYMBOL(pci_request_regions);
+EXPORT_SYMBOL(pci_release_region);
+EXPORT_SYMBOL(pci_request_region);
+EXPORT_SYMBOL(pci_find_class);
+EXPORT_SYMBOL(pci_find_device);
+EXPORT_SYMBOL(pci_find_slot);
+EXPORT_SYMBOL(pci_find_subsys);
+EXPORT_SYMBOL(pci_set_master);
+EXPORT_SYMBOL(pci_set_mwi);
+EXPORT_SYMBOL(pci_clear_mwi);
+EXPORT_SYMBOL(pdev_set_mwi);
+EXPORT_SYMBOL(pci_set_dma_mask);
+EXPORT_SYMBOL(pci_dac_set_dma_mask);
+EXPORT_SYMBOL(pci_assign_resource);
+EXPORT_SYMBOL(pci_register_driver);
+EXPORT_SYMBOL(pci_unregister_driver);
+EXPORT_SYMBOL(pci_dev_driver);
+EXPORT_SYMBOL(pci_match_device);
+EXPORT_SYMBOL(pci_find_parent_resource);
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pci_setup_device);
+EXPORT_SYMBOL(pci_insert_device);
+EXPORT_SYMBOL(pci_remove_device);
+EXPORT_SYMBOL(pci_announce_device_to_drivers);
+EXPORT_SYMBOL(pci_add_new_bus);
+EXPORT_SYMBOL(pci_do_scan_bus);
+EXPORT_SYMBOL(pci_scan_slot);
+EXPORT_SYMBOL(pci_scan_bus);
+EXPORT_SYMBOL(pci_scan_device);
+EXPORT_SYMBOL(pci_read_bridge_bases);
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(pci_proc_attach_device);
+EXPORT_SYMBOL(pci_proc_detach_device);
+EXPORT_SYMBOL(pci_proc_attach_bus);
+EXPORT_SYMBOL(pci_proc_detach_bus);
+EXPORT_SYMBOL(proc_bus_pci_dir);
+#endif
+#endif
+
+EXPORT_SYMBOL(pci_set_power_state);
+EXPORT_SYMBOL(pci_save_state);
+EXPORT_SYMBOL(pci_restore_state);
+EXPORT_SYMBOL(pci_enable_wake);
+
+/* Obsolete functions */
+
+EXPORT_SYMBOL(pcibios_present);
+EXPORT_SYMBOL(pcibios_read_config_byte);
+EXPORT_SYMBOL(pcibios_read_config_word);
+EXPORT_SYMBOL(pcibios_read_config_dword);
+EXPORT_SYMBOL(pcibios_write_config_byte);
+EXPORT_SYMBOL(pcibios_write_config_word);
+EXPORT_SYMBOL(pcibios_write_config_dword);
+EXPORT_SYMBOL(pcibios_find_class);
+EXPORT_SYMBOL(pcibios_find_device);
+
+/* Quirk info */
+
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+EXPORT_SYMBOL(pci_pci_problems);
+
+#if 0
+/* Pool allocator */
+
+EXPORT_SYMBOL (pci_pool_create);
+EXPORT_SYMBOL (pci_pool_destroy);
+EXPORT_SYMBOL (pci_pool_alloc);
+EXPORT_SYMBOL (pci_pool_free);
+
+#endif
diff --git a/xen/drivers/pci/pci.ids b/xen/drivers/pci/pci.ids
new file mode 100644
index 0000000000..c4e4283cc5
--- /dev/null
+++ b/xen/drivers/pci/pci.ids
@@ -0,0 +1,6778 @@
+#
+# List of PCI ID's
+#
+# Maintained by Martin Mares <mj@ucw.cz> and other volunteers from the
+# Linux PCI ID's Project at http://pciids.sf.net/. New data are always
+# welcome (if they are accurate), we're eagerly expecting new entries,
+# so if you have anything to contribute, please visit the home page or
+# send a diff -u against the most recent pci.ids to pci-ids@ucw.cz.
+#
+# $Id: pci.ids,v 1.46 2002/08/14 17:38:51 mares Exp $
+#
+
+# Vendors, devices and subsystems. Please keep sorted.
+
+# Syntax:
+# vendor vendor_name
+# device device_name <-- single tab
+# subvendor subdevice subsystem_name <-- two tabs
+
+0000 Gammagraphx, Inc.
+001a Ascend Communications, Inc.
+0033 Paradyne corp.
+003d Lockheed Martin-Marietta Corp
+0070 Hauppauge computer works Inc.
+0100 Ncipher Corp Ltd
+0675 Dynalink
+ 1700 IS64PH ISDN Adapter
+ 1702 IS64PH ISDN Adapter
+# Wrong ID used in subsystem ID of VIA USB controllers.
+0925 VIA Technologies, Inc. (Wrong ID)
+09c1 Arris
+ 0704 CM 200E Cable Modem
+0a89 BREA Technologies Inc
+0e11 Compaq Computer Corporation
+ 0001 PCI to EISA Bridge
+ 0002 PCI to ISA Bridge
+ 0049 NC7132 Gigabit Upgrade Module
+ 004a NC6136 Gigabit Server Adapter
+ 0508 Netelligent 4/16 Token Ring
+ 1000 Triflex/Pentium Bridge, Model 1000
+ 2000 Triflex/Pentium Bridge, Model 2000
+ 3032 QVision 1280/p
+ 3033 QVision 1280/p
+ 3034 QVision 1280/p
+ 4000 4000 [Triflex]
+ 6010 HotPlug PCI Bridge 6010
+ 7020 USB Controller
+ a0ec Fibre Channel Host Controller
+ a0f0 Advanced System Management Controller
+ a0f3 Triflex PCI to ISA Bridge
+ a0f7 PCI Hotplug Controller
+ 8086 002a PCI Hotplug Controller A
+ 8086 002b PCI Hotplug Controller B
+ a0f8 ZFMicro Chipset USB
+ a0fc Fibre Channel Host Controller
+ ae10 Smart-2/P RAID Controller
+ 0e11 4030 Smart-2/P Array Controller
+ 0e11 4031 Smart-2SL Array Controller
+ 0e11 4032 Smart Array Controller
+ 0e11 4033 Smart 3100ES Array Controller
+ ae29 MIS-L
+ ae2a MPC
+ ae2b MIS-E
+ ae31 System Management Controller
+ ae32 Netelligent 10/100
+ ae33 Triflex Dual EIDE Controller
+ ae34 Netelligent 10
+ ae35 Integrated NetFlex-3/P
+ ae40 Netelligent 10/100 Dual
+ ae43 ProLiant Integrated Netelligent 10/100
+ ae69 CETUS-L
+ ae6c Northstar
+ ae6d NorthStar CPU to PCI Bridge
+ b011 Integrated Netelligent 10/100
+ b012 Netelligent 10 T/2
+ b01e NC3120 Fast Ethernet NIC
+ b01f NC3122 Fast Ethernet NIC
+ b02f NC1120 Ethernet NIC
+ b030 Netelligent WS 5100
+ b04a 10/100 TX PCI Intel WOL UTP Controller
+ b060 Smart Array 5300 Controller
+ b0c6 NC3161 Fast Ethernet NIC
+ b0c7 NC3160 Fast Ethernet NIC
+ b0d7 NC3121 Fast Ethernet NIC
+ b0dd NC3131 Fast Ethernet NIC
+ b0de NC3132 Fast Ethernet Module
+ b0df NC6132 Gigabit Module
+ b0e0 NC6133 Gigabit Module
+ b0e1 NC3133 Fast Ethernet Module
+ b123 NC6134 Gigabit NIC
+ b134 NC3163 Fast Ethernet NIC
+ b13c NC3162 Fast Ethernet NIC
+ b144 NC3123 Fast Ethernet NIC
+ b163 NC3134 Fast Ethernet NIC
+ b164 NC3165 Fast Ethernet Upgrade Module
+ b178 Smart Array 5i/532
+ b1a4 NC7131 Gigabit Server Adapter
+ f130 NetFlex-3/P ThunderLAN 1.0
+ f150 NetFlex-3/P ThunderLAN 2.3
+0e55 HaSoTec GmbH
+1000 LSI Logic / Symbios Logic (formerly NCR)
+ 0001 53c810
+ 1000 1000 8100S
+ 0002 53c820
+ 0003 53c825
+ 0004 53c815
+ 0005 53c810AP
+ 0006 53c860
+ 000a 53c1510
+ 000b 53c896
+ 000c 53c895
+ 1de1 3907 DC-390U2W
+ 000d 53c885
+ 000f 53c875
+ 0e11 7004 Embedded Ultra Wide SCSI Controller
+ 1092 8760 FirePort 40 Dual SCSI Controller
+ 1de1 3904 DC390F Ultra Wide SCSI Controller
+ 0010 53c895
+ 0e11 4040 Integrated Array Controller
+ 0e11 4048 Integrated Array Controller
+ 0012 53c895a
+ 0013 53c875a
+ 0020 53c1010 Ultra3 SCSI Adapter
+ 1de1 1020 DC-390U3W
+ 0021 53c1010 66MHz Ultra3 SCSI Adapter
+ 0030 53c1030
+ 1028 1010 LSI U320 SCSI Controller
+ 0040 53c1035
+ 008f 53c875J
+ 1092 8000 FirePort 40 SCSI Controller
+ 1092 8760 FirePort 40 Dual SCSI Host Adapter
+ 0621 FC909
+ 0622 FC929
+ 0623 FC929 LAN
+ 0624 FC919
+ 0625 FC919 LAN
+ 0626 FC929X
+ 0627 FC929X LAN
+ 0628 FC919X
+ 0629 FC919X LAN
+ 0701 83C885 NT50 DigitalScape Fast Ethernet
+ 0702 Yellowfin G-NIC gigabit ethernet
+ 1318 0000 PEI100X
+ 0901 61C102
+ 1000 63C815
+ 1960 PowerEdge Expandable RAID Controller 4
+ 1028 0518 PowerEdge Expandable RAID Controller 4/DC
+ 1028 0520 PowerEdge Expandable RAID Controller 4/SC
+ 1028 0531 PowerEdge Expandable RAID Controller 4/QC
+1001 Kolter Electronic
+ 0010 PCI 1616 Measurement card with 32 digital I/O lines
+ 0011 OPTO-PCI Opto-Isolated digital I/O board
+ 0012 PCI-AD/DA Analogue I/O board
+ 0013 PCI-OPTO-RELAIS Digital I/O board with relay outputs
+ 0014 PCI-Counter/Timer Counter Timer board
+ 0015 PCI-DAC416 Analogue output board
+ 0016 PCI-MFB Analogue I/O board
+ 0017 PROTO-3 PCI Prototyping board
+ 9100 INI-9100/9100W SCSI Host
+1002 ATI Technologies Inc
+ 4158 68800AX [Mach32]
+ 4242 Radeon 8500 DV
+ 1002 02aa Radeon 8500 AIW DV Edition
+ 4354 215CT [Mach64 CT]
+ 4358 210888CX [Mach64 CX]
+ 4554 210888ET [Mach64 ET]
+ 4654 Mach64 VT
+ 4742 3D Rage Pro AGP 1X/2X
+ 1002 0040 Rage Pro Turbo AGP 2X
+ 1002 0044 Rage Pro Turbo AGP 2X
+ 1002 0061 Rage Pro AIW AGP 2X
+ 1002 0062 Rage Pro AIW AGP 2X
+ 1002 0063 Rage Pro AIW AGP 2X
+ 1002 0080 Rage Pro Turbo AGP 2X
+ 1002 0084 Rage Pro Turbo AGP 2X
+ 1002 4742 Rage Pro Turbo AGP 2X
+ 1002 8001 Rage Pro Turbo AGP 2X
+ 1028 0082 Rage Pro Turbo AGP 2X
+ 1028 4082 Optiplex GX1 Onboard Display Adapter
+ 1028 8082 Rage Pro Turbo AGP 2X
+ 1028 c082 Rage Pro Turbo AGP 2X
+ 8086 4152 Xpert 98D AGP 2X
+ 8086 464a Rage Pro Turbo AGP 2X
+ 4744 3D Rage Pro AGP 1X
+ 1002 4744 Rage Pro Turbo AGP
+ 4747 3D Rage Pro
+ 4749 3D Rage Pro
+ 1002 0061 Rage Pro AIW
+ 1002 0062 Rage Pro AIW
+ 474c Rage XC
+ 474d Rage XL AGP 2X
+ 1002 0004 Xpert 98 RXL AGP 2X
+ 1002 0008 Xpert 98 RXL AGP 2X
+ 1002 0080 Rage XL AGP 2X
+ 1002 0084 Xpert 98 AGP 2X
+ 1002 474d Rage XL AGP
+ 1033 806a Rage XL AGP
+ 474e Rage XC AGP
+ 1002 474e Rage XC AGP
+ 474f Rage XL
+ 1002 0008 Rage XL
+ 1002 474f Rage XL
+ 4750 3D Rage Pro 215GP
+ 1002 0040 Rage Pro Turbo
+ 1002 0044 Rage Pro Turbo
+ 1002 0080 Rage Pro Turbo
+ 1002 0084 Rage Pro Turbo
+ 1002 4750 Rage Pro Turbo
+ 4751 3D Rage Pro 215GQ
+ 4752 Rage XL
+ 1002 0008 Rage XL
+ 1002 4752 Rage XL
+ 4753 Rage XC
+ 1002 4753 Rage XC
+ 4754 3D Rage I/II 215GT [Mach64 GT]
+ 4755 3D Rage II+ 215GTB [Mach64 GTB]
+ 4756 3D Rage IIC 215IIC [Mach64 GT IIC]
+ 1002 4756 Rage IIC
+ 4757 3D Rage IIC AGP
+ 1002 4757 Rage IIC AGP
+ 1028 0089 Rage 3D IIC
+ 1028 4082 Rage 3D IIC
+ 1028 8082 Rage 3D IIC
+ 1028 c082 Rage 3D IIC
+ 4758 210888GX [Mach64 GX]
+ 4759 3D Rage IIC
+ 475a 3D Rage IIC AGP
+ 1002 0087 Rage 3D IIC
+ 1002 475a Rage IIC AGP
+ 4c42 3D Rage LT Pro AGP-133
+ 0e11 b0e8 Rage 3D LT Pro
+ 0e11 b10e 3D Rage LT Pro (Compaq Armada 1750)
+ 1002 0040 Rage LT Pro AGP 2X
+ 1002 0044 Rage LT Pro AGP 2X
+ 1002 4c42 Rage LT Pro AGP 2X
+ 1002 8001 Rage LT Pro AGP 2X
+ 1028 0085 Rage 3D LT Pro
+ 4c44 3D Rage LT Pro AGP-66
+ 4c45 Rage Mobility M3 AGP
+ 4c46 Rage Mobility M3 AGP 2x
+ 4c47 3D Rage LT-G 215LG
+ 4c49 3D Rage LT Pro
+ 1002 0004 Rage LT Pro
+ 1002 0040 Rage LT Pro
+ 1002 0044 Rage LT Pro
+ 1002 4c49 Rage LT Pro
+ 4c4d Rage Mobility P/M AGP 2x
+ 1002 0084 Xpert 98 AGP 2X (Mobility)
+ 4c4e Rage Mobility L AGP 2x
+ 4c50 3D Rage LT Pro
+ 1002 4c50 Rage LT Pro
+ 4c51 3D Rage LT Pro
+ 4c52 Rage Mobility P/M
+ 4c53 Rage Mobility L
+ 4c54 264LT [Mach64 LT]
+ 4c57 Radeon Mobility M7 LW
+ 1028 00e6 Radeon Mobility M7 LW (Dell Inspiron 8100)
+ 4c58 Radeon Mobility M7 LX [Radeon Mobility FireGL 7800]
+ 4c59 Radeon Mobility M6 LY
+ 1014 0235 ThinkPad A30p (2653-64G)
+ 1014 0239 ThinkPad X22/X23/X24
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 4c5a Radeon Mobility M6 LZ
+ 4d46 Rage Mobility M4 AGP
+ 4d4c Rage Mobility M4 AGP
+ 5041 Rage 128 PA/PRO
+ 5042 Rage 128 PB/PRO AGP 2x
+ 5043 Rage 128 PC/PRO AGP 4x
+ 5044 Rage 128 PD/PRO TMDS
+ 1002 0028 Rage 128 AIW
+ 1002 0029 Rage 128 AIW
+ 5045 Rage 128 PE/PRO AGP 2x TMDS
+ 5046 Rage 128 PF/PRO AGP 4x TMDS
+ 1002 0004 Rage Fury Pro
+ 1002 0008 Rage Fury Pro/Xpert 2000 Pro
+ 1002 0014 Rage Fury Pro
+ 1002 0018 Rage Fury Pro/Xpert 2000 Pro
+ 1002 0028 Rage 128 Pro AIW AGP
+ 1002 002a Rage 128 Pro AIW AGP
+ 1002 0048 Rage Fury Pro
+ 1002 2000 Rage Fury MAXX AGP 4x (TMDS) (VGA device)
+ 1002 2001 Rage Fury MAXX AGP 4x (TMDS) (Extra device?!)
+ 5047 Rage 128 PG/PRO
+ 5048 Rage 128 PH/PRO AGP 2x
+ 5049 Rage 128 PI/PRO AGP 4x
+ 504a Rage 128 PJ/PRO TMDS
+ 504b Rage 128 PK/PRO AGP 2x TMDS
+ 504c Rage 128 PL/PRO AGP 4x TMDS
+ 504d Rage 128 PM/PRO
+ 504e Rage 128 PN/PRO AGP 2x
+ 504f Rage 128 PO/PRO AGP 4x
+ 5050 Rage 128 PP/PRO TMDS
+ 1002 0008 Xpert 128
+ 5051 Rage 128 PQ/PRO AGP 2x TMDS
+ 5052 Rage 128 PR/PRO AGP 4x TMDS
+ 5053 Rage 128 PS/PRO
+ 5054 Rage 128 PT/PRO AGP 2x
+ 5055 Rage 128 PU/PRO AGP 4x
+ 5056 Rage 128 PV/PRO TMDS
+ 5057 Rage 128 PW/PRO AGP 2x TMDS
+ 5058 Rage 128 PX/PRO AGP 4x TMDS
+ 5144 Radeon QD
+ 1002 0008 Radeon 7000/Radeon VE
+ 1002 0009 Radeon 7000/Radeon
+ 1002 000a Radeon 7000/Radeon
+ 1002 001a Radeon 7000/Radeon
+ 1002 0029 Radeon AIW
+ 1002 0038 Radeon 7000/Radeon
+ 1002 0039 Radeon 7000/Radeon
+ 1002 008a Radeon 7000/Radeon
+ 1002 00ba Radeon 7000/Radeon
+ 1002 0139 Radeon 7000/Radeon
+ 1002 028a Radeon 7000/Radeon
+ 1002 02aa Radeon AIW
+ 1002 053a Radeon 7000/Radeon
+ 5145 Radeon QE
+ 5146 Radeon QF
+ 5147 Radeon QG
+ 5148 Radeon R200 QH [Radeon 8500]
+ 1002 0152 FireGL 8800
+ 1002 0172 FireGL 8700
+ 5149 Radeon R200 QI
+ 514a Radeon R200 QJ
+ 514b Radeon R200 QK
+ 514c Radeon R200 QL [Radeon 8500 LE]
+ 1002 003a Radeon R200 QL [Radeon 8500 LE]
+ 1002 013a Radeon 8500
+ 5157 Radeon 7500 QW
+ 1002 013a Radeon 7500
+ 174b 7161 Radeon RV200 QW [Radeon 7500 LE]
+ 5158 Radeon 7500 QX
+ 5159 Radeon VE QY
+ 1002 000a Radeon 7000/Radeon VE
+ 1002 0038 Radeon 7000/Radeon VE
+ 1002 003a Radeon 7000/Radeon VE
+ 1002 00ba Radeon 7000/Radeon VE
+ 1002 013a Radeon 7000/Radeon VE
+ 174b 7112 Radeon 7000 64M TVO
+ 515a Radeon VE QZ
+ 5168 Radeon R200 Qh
+ 5169 Radeon R200 Qi
+ 516a Radeon R200 Qj
+ 516b Radeon R200 Qk
+ 5245 Rage 128 RE/SG
+ 1002 0008 Xpert 128
+ 1002 0028 Rage 128 AIW
+ 1002 0029 Rage 128 AIW
+ 1002 0068 Rage 128 AIW
+ 5246 Rage 128 RF/SG AGP
+ 1002 0004 Magnum/Xpert 128/Xpert 99
+ 1002 0008 Magnum/Xpert128/X99/Xpert2000
+ 1002 0028 Rage 128 AIW AGP
+ 1002 0044 Rage Fury/Xpert 128/Xpert 2000
+ 1002 0068 Rage 128 AIW AGP
+ 1002 0448 Rage Fury
+ 5247 Rage 128 RG
+ 524b Rage 128 RK/VR
+ 524c Rage 128 RL/VR AGP
+ 1002 0008 Xpert 99/Xpert 2000
+ 1002 0088 Xpert 99
+ 5345 Rage 128 SE/4x
+ 5346 Rage 128 SF/4x AGP 2x
+ 5347 Rage 128 SG/4x AGP 4x
+ 5348 Rage 128 SH
+ 534b Rage 128 SK/4x
+ 534c Rage 128 SL/4x AGP 2x
+ 534d Rage 128 SM/4x AGP 4x
+ 1002 0008 Xpert 99/Xpert 2000
+ 1002 0018 Xpert 2000
+ 534e Rage 128 4x
+ 5354 Mach 64 VT
+ 1002 5654 Mach 64 reference
+ 5446 Rage 128 Pro Ultra TF
+ 1002 0004 Rage Fury Pro
+ 1002 0008 Rage Fury Pro/Xpert 2000 Pro
+ 1002 0018 Rage Fury Pro/Xpert 2000 Pro
+ 1002 0028 Rage 128 AIW Pro AGP
+ 1002 0029 Rage 128 AIW
+ 1002 002a Rage 128 AIW Pro AGP
+ 1002 002b Rage 128 AIW
+ 1002 0048 Xpert 2000 Pro
+ 544c Rage 128 Pro Ultra TL
+ 5452 Rage 128 Pro Ultra TR
+ 1002 001c Rage 128 Pro 4XL
+ 103c 1279 Rage 128 Pro 4XL
+ 5453 Rage 128 Pro Ultra TS
+ 5454 Rage 128 Pro Ultra TT
+ 5455 Rage 128 Pro Ultra TU
+ 5654 264VT [Mach64 VT]
+ 1002 5654 Mach64VT Reference
+ 5655 264VT3 [Mach64 VT3]
+ 5656 264VT4 [Mach64 VT4]
+1003 ULSI Systems
+ 0201 US201
+1004 VLSI Technology Inc
+ 0005 82C592-FC1
+ 0006 82C593-FC1
+ 0007 82C594-AFC2
+ 0008 82C596/7 [Wildcat]
+ 0009 82C597-AFC2
+ 000c 82C541 [Lynx]
+ 000d 82C543 [Lynx]
+ 0101 82C532
+ 0102 82C534 [Eagle]
+ 0103 82C538
+ 0104 82C535
+ 0105 82C147
+ 0200 82C975
+ 0280 82C925
+ 0304 QSound ThunderBird PCI Audio
+ 1004 0304 QSound ThunderBird PCI Audio
+ 122d 1206 DSP368 Audio
+ 1483 5020 XWave Thunder 3D Audio
+ 0305 QSound ThunderBird PCI Audio Gameport
+ 1004 0305 QSound ThunderBird PCI Audio Gameport
+ 122d 1207 DSP368 Audio Gameport
+ 1483 5021 XWave Thunder 3D Audio Gameport
+ 0306 QSound ThunderBird PCI Audio Support Registers
+ 1004 0306 QSound ThunderBird PCI Audio Support Registers
+ 122d 1208 DSP368 Audio Support Registers
+ 1483 5022 XWave Thunder 3D Audio Support Registers
+ 0702 VAS96011 [Golden Gate II]
+1005 Avance Logic Inc. [ALI]
+ 2064 ALG2032/2064
+ 2128 ALG2364A
+ 2301 ALG2301
+ 2302 ALG2302
+ 2364 ALG2364
+ 2464 ALG2364A
+ 2501 ALG2564A/25128A
+1006 Reply Group
+1007 NetFrame Systems Inc
+1008 Epson
+100a Phoenix Technologies
+100b National Semiconductor Corporation
+ 0001 DP83810
+ 0002 87415/87560 IDE
+ 000e 87560 Legacy I/O
+ 000f FireWire Controller
+ 0011 NS87560 National PCI System I/O
+ 0012 USB Controller
+ 0020 DP83815 (MacPhyter) Ethernet Controller
+ 0022 DP83820 10/100/1000 Ethernet Controller
+ 0500 SCx200 Bridge
+ 0501 SCx200 SMI
+ 0502 SCx200 IDE
+ 0503 SCx200 Audio
+ 0504 SCx200 Video
+ 0505 SCx200 XBus
+ d001 87410 IDE
+100c Tseng Labs Inc
+ 3202 ET4000/W32p rev A
+ 3205 ET4000/W32p rev B
+ 3206 ET4000/W32p rev C
+ 3207 ET4000/W32p rev D
+ 3208 ET6000
+ 4702 ET6300
+100d AST Research Inc
+100e Weitek
+ 9000 P9000 Viper
+ 9001 P9000 Viper
+ 9002 P9000 Viper
+ 9100 P9100 Viper Pro/SE
+1010 Video Logic, Ltd.
+1011 Digital Equipment Corporation
+ 0001 DECchip 21050
+ 0002 DECchip 21040 [Tulip]
+ 0004 DECchip 21030 [TGA]
+ 0007 NVRAM [Zephyr NVRAM]
+ 0008 KZPSA [KZPSA]
+ 0009 DECchip 21140 [FasterNet]
+ 1025 0310 21140 Fast Ethernet
+ 10b8 2001 SMC9332BDT EtherPower 10/100
+ 10b8 2002 SMC9332BVT EtherPower T4 10/100
+ 10b8 2003 SMC9334BDT EtherPower 10/100 (1-port)
+ 1109 2400 ANA-6944A/TX Fast Ethernet
+ 1112 2300 RNS2300 Fast Ethernet
+ 1112 2320 RNS2320 Fast Ethernet
+ 1112 2340 RNS2340 Fast Ethernet
+ 1113 1207 EN-1207-TX Fast Ethernet
+ 1186 1100 DFE-500TX Fast Ethernet
+ 1186 1112 DFE-570TX Fast Ethernet
+ 1186 1140 DFE-660 Cardbus Ethernet 10/100
+ 1186 1142 DFE-660 Cardbus Ethernet 10/100
+ 11f6 0503 Freedomline Fast Ethernet
+ 1282 9100 AEF-380TXD Fast Ethernet
+ 1385 1100 FA310TX Fast Ethernet
+ 2646 0001 KNE100TX Fast Ethernet
+ 000a 21230 Video Codec
+ 000d PBXGB [TGA2]
+ 000f DEFPA
+ 0014 DECchip 21041 [Tulip Pass 3]
+ 1186 0100 DE-530+
+ 0016 DGLPB [OPPO]
+ 0019 DECchip 21142/43
+ 1011 500a DE500A Fast Ethernet
+ 1011 500b DE500B Fast Ethernet
+ 1014 0001 10/100 EtherJet Cardbus
+ 1025 0315 ALN315 Fast Ethernet
+ 1033 800c PC-9821-CS01
+ 1033 800d PC-9821NR-B06
+ 108d 0016 Rapidfire 2327 10/100 Ethernet
+ 108d 0017 GoCard 2250 Ethernet 10/100 Cardbus
+ 10b8 2005 SMC8032DT Extreme Ethernet 10/100
+ 10b8 8034 SMC8034 Extreme Ethernet 10/100
+ 10ef 8169 Cardbus Fast Ethernet
+ 1109 2a00 ANA-6911A/TX Fast Ethernet
+ 1109 2b00 ANA-6911A/TXC Fast Ethernet
+ 1109 3000 ANA-6922/TX Fast Ethernet
+ 1113 1207 Cheetah Fast Ethernet
+ 1113 2220 Cardbus Fast Ethernet
+ 115d 0002 Cardbus Ethernet 10/100
+ 1179 0203 Fast Ethernet
+ 1179 0204 Cardbus Fast Ethernet
+ 1186 1100 DFE-500TX Fast Ethernet
+ 1186 1101 DFE-500TX Fast Ethernet
+ 1186 1102 DFE-500TX Fast Ethernet
+ 1259 2800 AT-2800Tx Fast Ethernet
+ 1266 0004 Eagle Fast EtherMAX
+ 12af 0019 NetFlyer Cardbus Fast Ethernet
+ 1374 0001 Cardbus Ethernet Card 10/100
+ 1374 0002 Cardbus Ethernet Card 10/100
+ 1374 0007 Cardbus Ethernet Card 10/100
+ 1374 0008 Cardbus Ethernet Card 10/100
+ 1395 0001 10/100 Ethernet CardBus PC Card
+ 13d1 ab01 EtherFast 10/100 Cardbus (PCMPC200)
+ 8086 0001 EtherExpress PRO/100 Mobile CardBus 32
+ 001a Farallon PN9000SX
+ 0021 DECchip 21052
+ 0022 DECchip 21150
+ 0023 DECchip 21150
+ 0024 DECchip 21152
+ 0025 DECchip 21153
+ 0026 DECchip 21154
+ 0034 56k Modem Cardbus
+ 1374 0003 56k Modem Cardbus
+ 0045 DECchip 21553
+ 0046 DECchip 21554
+ 0e11 4050 Integrated Smart Array
+ 0e11 4051 Integrated Smart Array
+ 0e11 4058 Integrated Smart Array
+ 103c 10c2 Hewlett-Packard NetRAID-4M
+ 12d9 000a VoIP PCI Gateway
+ 9005 0365 Adaptec 5400S
+ 9005 1364 Dell PowerEdge RAID Controller 2
+ 9005 1365 Dell PowerEdge RAID Controller 2
+ e4bf 1000 CC8-1-BLUES
+ 1065 StrongARM DC21285
+ 1069 0020 DAC960P / DAC1164P
+1012 Micronics Computers Inc
+1013 Cirrus Logic
+ 0038 GD 7548
+ 0040 GD 7555 Flat Panel GUI Accelerator
+ 004c GD 7556 Video/Graphics LCD/CRT Ctrlr
+ 00a0 GD 5430/40 [Alpine]
+ 00a2 GD 5432 [Alpine]
+ 00a4 GD 5434-4 [Alpine]
+ 00a8 GD 5434-8 [Alpine]
+ 00ac GD 5436 [Alpine]
+ 00b0 GD 5440
+ 00b8 GD 5446
+ 00bc GD 5480
+ 1013 00bc CL-GD5480
+ 00d0 GD 5462
+ 00d2 GD 5462 [Laguna I]
+ 00d4 GD 5464 [Laguna]
+ 00d5 GD 5464 BD [Laguna]
+ 00d6 GD 5465 [Laguna]
+ 13ce 8031 Barco Metheus 2 Megapixel, Dual Head
+ 13cf 8031 Barco Metheus 2 Megapixel, Dual Head
+ 00e8 GD 5436U
+ 1100 CL 6729
+ 1110 PD 6832 PCMCIA/CardBus Ctrlr
+ 1112 PD 6834 PCMCIA/CardBus Ctrlr
+ 1113 PD 6833 PCMCIA/CardBus Ctrlr
+ 1200 GD 7542 [Nordic]
+ 1202 GD 7543 [Viking]
+ 1204 GD 7541 [Nordic Light]
+ 4400 CD 4400
+ 6001 CS 4610/11 [CrystalClear SoundFusion Audio Accelerator]
+ 1014 1010 CS4610 SoundFusion Audio Accelerator
+ 6003 CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator]
+ 1013 4280 Crystal SoundFusion PCI Audio Accelerator
+ 1681 0050 Hercules Game Theater XP
+ 1681 a011 Hercules Fortissimo III 7.1
+ 6004 CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator]
+ 6005 Crystal CS4281 PCI Audio
+ 1013 4281 Crystal CS4281 PCI Audio
+ 10cf 10a8 Crystal CS4281 PCI Audio
+ 10cf 10a9 Crystal CS4281 PCI Audio
+ 10cf 10aa Crystal CS4281 PCI Audio
+ 10cf 10ab Crystal CS4281 PCI Audio
+ 10cf 10ac Crystal CS4281 PCI Audio
+ 10cf 10ad Crystal CS4281 PCI Audio
+ 10cf 10b4 Crystal CS4281 PCI Audio
+ 1179 0001 Crystal CS4281 PCI Audio
+ 14c0 000c Crystal CS4281 PCI Audio
+1014 IBM
+ 0002 PCI to MCA Bridge
+ 0005 Alta Lite
+ 0007 Alta MP
+ 000a Fire Coral
+ 0017 CPU to PCI Bridge
+ 0018 TR Auto LANstreamer
+ 001b GXT-150P
+ 001c Carrera
+ 001d 82G2675
+ 0020 MCA
+ 0022 IBM27-82351
+ 002d Python
+ 002e ServeRAID-3x
+ 0036 Miami
+ 003a CPU to PCI Bridge
+ 003e 16/4 Token ring UTP/STP controller
+ 1014 003e Token-Ring Adapter
+ 1014 00cd Token-Ring Adapter + Wake-On-LAN
+ 1014 00ce 16/4 Token-Ring Adapter 2
+ 1014 00cf 16/4 Token-Ring Adapter Special
+ 1014 00e4 High-Speed 100/16/4 Token-Ring Adapter
+ 1014 00e5 16/4 Token-Ring Adapter 2 + Wake-On-LAN
+ 1014 016d iSeries 2744 Card
+ 0045 SSA Adapter
+ 0046 MPIC interrupt controller
+ 0047 PCI to PCI Bridge
+ 0048 PCI to PCI Bridge
+ 0049 Warhead SCSI Controller
+ 004e ATM Controller (14104e00)
+ 004f ATM Controller (14104f00)
+ 0050 ATM Controller (14105000)
+ 0053 25 MBit ATM Controller
+ 0057 MPEG PCI Bridge
+ 005c i82557B 10/100
+ 007c ATM Controller (14107c00)
+ 007d 3780IDSP [MWave]
+ 0090 GXT 3000P
+ 1014 008e GXT-3000P
+ 0095 20H2999 PCI Docking Bridge
+ 0096 Chukar chipset SCSI controller
+ 1014 0097 iSeries 2778 DASD IOA
+ 1014 0098 iSeries 2763 DASD IOA
+ 1014 0099 iSeries 2748 DASD IOA
+ 00a5 ATM Controller (1410a500)
+ 00a6 ATM 155MBPS MM Controller (1410a600)
+ 00b7 256-bit Graphics Rasterizer [Fire GL1]
+ 1902 00b8 Fire GL1
+ 00be ATM 622MBPS Controller (1410be00)
+ 00fc CPC710 Dual Bridge and Memory Controller (PCI-64)
+ 0105 CPC710 Dual Bridge and Memory Controller (PCI-32)
+ 0142 Yotta Video Compositor Input
+ 1014 0143 Yotta Input Controller (ytin)
+ 0144 Yotta Video Compositor Output
+ 1014 0145 Yotta Output Controller (ytout)
+ 0156 405GP PLB to PCI Bridge
+ 01a7 PCI-X to PCI-X Bridge
+ 01bd Netfinity ServeRAID controller
+ 01be ServeRAID-4M
+ 01bf ServeRAID-4L
+ 022e ServeRAID-4H
+ ffff MPIC-2 interrupt controller
+1015 LSI Logic Corp of Canada
+1016 ICL Personal Systems
+1017 SPEA Software AG
+ 5343 SPEA 3D Accelerator
+1018 Unisys Systems
+1019 Elitegroup Computer Systems
+101a AT&T GIS (NCR)
+ 0005 100VG ethernet
+101b Vitesse Semiconductor
+101c Western Digital
+ 0193 33C193A
+ 0196 33C196A
+ 0197 33C197A
+ 0296 33C296A
+ 3193 7193
+ 3197 7197
+ 3296 33C296A
+ 4296 34C296
+ 9710 Pipeline 9710
+ 9712 Pipeline 9712
+ c24a 90C
+101e American Megatrends Inc.
+ 1960 MegaRAID
+ 101e 0471 MegaRAID 471 Enterprise 1600 RAID Controller
+ 101e 0475 MegaRAID 475 Express 500 RAID Controller
+ 101e 0493 MegaRAID 493 Elite 1600 RAID Controller
+ 1028 0471 PowerEdge RAID Controller 3/QC
+ 1028 0475 PowerEdge RAID Controller 3/SC
+ 1028 0493 PowerEdge RAID Controller 3/DC
+ 1028 0511 PowerEdge Cost Effective RAID Controller ATA100/4Ch
+ 9010 MegaRAID 428 Ultra RAID Controller
+ 9030 EIDE Controller
+ 9031 EIDE Controller
+ 9032 EIDE & SCSI Controller
+ 9033 SCSI Controller
+ 9040 Multimedia card
+ 9060 MegaRAID 434 Ultra GT RAID Controller
+ 9063 MegaRAC
+ 101e 0767 Dell Remote Assistant Card 2
+101f PictureTel
+1020 Hitachi Computer Products
+1021 OKI Electric Industry Co. Ltd.
+1022 Advanced Micro Devices [AMD]
+ 2000 79c970 [PCnet LANCE]
+ 1014 2000 NetFinity 10/100 Fast Ethernet
+ 103c 104c Ethernet with LAN remote power Adapter
+ 103c 1064 Ethernet with LAN remote power Adapter
+ 103c 1065 Ethernet with LAN remote power Adapter
+ 103c 106c Ethernet with LAN remote power Adapter
+ 103c 106e Ethernet with LAN remote power Adapter
+ 103c 10ea Ethernet with LAN remote power Adapter
+ 1113 1220 EN1220 10/100 Fast Ethernet
+ 1259 2450 AT-2450 10/100 Fast Ethernet
+ 1259 2454 AT-2450v4 10Mb Ethernet Adapter
+ 1259 2700 AT-2700TX 10/100 Fast Ethernet
+ 1259 2701 AT-2700FX 100Mb Ethernet
+ 2001 79c978 [HomePNA]
+ 1092 0a78 Multimedia Home Network Adapter
+ 1668 0299 ActionLink Home Network Adapter
+ 2020 53c974 [PCscsi]
+ 2040 79c974
+ 3000 ELanSC520 Microcontroller
+ 7006 AMD-751 [Irongate] System Controller
+ 7007 AMD-751 [Irongate] AGP Bridge
+ 700c AMD-760 MP [IGD4-2P] System Controller
+ 700d AMD-760 MP [IGD4-2P] AGP Bridge
+ 700e AMD-760 [IGD4-1P] System Controller
+ 700f AMD-760 [IGD4-1P] AGP Bridge
+ 7400 AMD-755 [Cobra] ISA
+ 7401 AMD-755 [Cobra] IDE
+ 7403 AMD-755 [Cobra] ACPI
+ 7404 AMD-755 [Cobra] USB
+ 7408 AMD-756 [Viper] ISA
+ 7409 AMD-756 [Viper] IDE
+ 740b AMD-756 [Viper] ACPI
+ 740c AMD-756 [Viper] USB
+ 7410 AMD-766 [ViperPlus] ISA
+ 7411 AMD-766 [ViperPlus] IDE
+ 7413 AMD-766 [ViperPlus] ACPI
+ 7414 AMD-766 [ViperPlus] USB
+ 7440 AMD-768 [Opus] ISA
+ 1043 8044 A7M-D Mainboard
+ 7441 AMD-768 [Opus] IDE
+ 7443 AMD-768 [Opus] ACPI
+ 1043 8044 A7M-D Mainboard
+ 7445 AMD-768 [Opus] Audio
+ 7448 AMD-768 [Opus] PCI
+ 7449 AMD-768 [Opus] USB
+ 7454 AMD-8151 System Controller
+ 7455 AMD-8151 AGP Bridge
+ 7460 AMD-8111 PCI
+ 7461 AMD-8111 USB
+ 7462 AMD-8111 Ethernet
+ 7468 AMD-8111 LPC
+ 7469 AMD-8111 IDE
+ 746a AMD-8111 SMBus 2.0
+ 746b AMD-8111 ACPI
+ 746d AMD-8111 AC97 Audio
+ 756b AMD-8111 ACPI
+1023 Trident Microsystems
+ 0194 82C194
+ 2000 4DWave DX
+ 2001 4DWave NX
+ 8400 CyberBlade/i7
+ 1023 8400 CyberBlade i7 AGP
+ 8420 CyberBlade/i7d
+ 0e11 b15a CyberBlade i7 AGP
+ 8500 CyberBlade/i1
+ 8520 CyberBlade i1
+ 0e11 b16e CyberBlade i1 AGP
+ 1023 8520 CyberBlade i1 AGP
+ 8820 CyberBlade XPAi1
+ 9320 TGUI 9320
+ 9350 GUI Accelerator
+ 9360 Flat panel GUI Accelerator
+ 9382 Cyber 9382 [Reference design]
+ 9383 Cyber 9383 [Reference design]
+ 9385 Cyber 9385 [Reference design]
+ 9386 Cyber 9386
+ 9388 Cyber 9388
+ 9397 Cyber 9397
+ 939a Cyber 9397DVD
+ 9420 TGUI 9420
+ 9430 TGUI 9430
+ 9440 TGUI 9440
+ 9460 TGUI 9460
+ 9470 TGUI 9470
+ 9520 Cyber 9520
+ 9525 Cyber 9525
+ 10cf 1094 Lifebook C6155
+ 9540 Cyber 9540
+ 9660 TGUI 9660/938x/968x
+ 9680 TGUI 9680
+ 9682 TGUI 9682
+ 9683 TGUI 9683
+ 9685 ProVIDIA 9685
+ 9750 3DImage 9750
+ 1014 9750 3DImage 9750
+ 1023 9750 3DImage 9750
+ 9753 TGUI 9753
+ 9754 TGUI 9754
+ 9759 TGUI 975
+ 9783 TGUI 9783
+ 9785 TGUI 9785
+ 9850 3DImage 9850
+ 9880 Blade 3D PCI/AGP
+ 1023 9880 Blade 3D
+ 9910 CyberBlade/XP
+ 9930 CyberBlade/XPm
+1024 Zenith Data Systems
+1025 Acer Incorporated [ALI]
+ 1435 M1435
+ 1445 M1445
+ 1449 M1449
+ 1451 M1451
+ 1461 M1461
+ 1489 M1489
+ 1511 M1511
+ 1512 ALI M1512 Aladdin
+ 1513 M1513
+ 1521 ALI M1521 Aladdin III CPU Bridge
+ 10b9 1521 ALI M1521 Aladdin III CPU Bridge
+ 1523 ALI M1523 ISA Bridge
+ 10b9 1523 ALI M1523 ISA Bridge
+ 1531 M1531 Northbridge [Aladdin IV/IV+]
+ 1533 M1533 PCI-to-ISA Bridge
+ 10b9 1533 ALI M1533 Aladdin IV/V ISA South Bridge
+ 1535 M1535 PCI Bridge + Super I/O + FIR
+ 1541 M1541 Northbridge [Aladdin V]
+ 10b9 1541 ALI M1541 Aladdin V/V+ AGP+PCI North Bridge
+ 1542 M1542 Northbridge [Aladdin V]
+ 1543 M1543 PCI-to-ISA Bridge + Super I/O + FIR
+ 1561 M1561 Northbridge [Aladdin 7]
+ 1621 M1621 Northbridge [Aladdin-Pro II]
+ 1631 M1631 Northbridge+3D Graphics [Aladdin TNT2]
+ 1641 M1641 Northbridge [Aladdin-Pro IV]
+ 1647 M1647 [MaGiK1] PCI North Bridge
+ 3141 M3141
+ 3143 M3143
+ 3145 M3145
+ 3147 M3147
+ 3149 M3149
+ 3151 M3151
+ 3307 M3307 MPEG-I Video Controller
+ 3309 M3309 MPEG-II Video w/ Software Audio Decoder
+ 3321 M3321 MPEG-II Audio/Video Decoder
+ 5212 M4803
+ 5215 ALI PCI EIDE Controller
+ 5217 M5217H
+ 5219 M5219
+ 5225 M5225
+ 5229 M5229
+ 5235 M5235
+ 5237 M5237 PCI USB Host Controller
+ 5240 EIDE Controller
+ 5241 PCMCIA Bridge
+ 5242 General Purpose Controller
+ 5243 PCI to PCI Bridge Controller
+ 5244 Floppy Disk Controller
+ 5247 M1541 PCI to PCI Bridge
+ 5251 M5251 P1394 Controller
+ 5427 PCI to AGP Bridge
+ 5451 M5451 PCI AC-Link Controller Audio Device
+ 5453 M5453 PCI AC-Link Controller Modem Device
+ 7101 M7101 PCI PMU Power Management Controller
+ 10b9 7101 M7101 PCI PMU Power Management Controller
+1028 Dell Computer Corporation
+ 0001 PowerEdge Expandable RAID Controller 2/Si
+ 1028 0001 PowerEdge Expandable RAID Controller 2/Si
+ 0002 PowerEdge Expandable RAID Controller 3
+ 1028 0002 PowerEdge Expandable RAID Controller 3/Di
+ 1028 00d1 PowerEdge Expandable RAID Controller 3/Di
+ 1028 00d9 PowerEdge Expandable RAID Controller 3/Di
+ 0003 PowerEdge Expandable RAID Controller 3/Si
+ 1028 0003 PowerEdge Expandable RAID Controller 3/Si
+ 0004 PowerEdge Expandable RAID Controller 3/Si
+ 1028 00d0 PowerEdge Expandable RAID Controller 3/Si
+ 0005 PowerEdge Expandable RAID Controller 3/Di
+ 0006 PowerEdge Expandable RAID Controller 3/Di
+ 0007 Remote Assistant Card 3
+ 0008 PowerEdge Expandable RAID Controller 3/Di
+ 000a PowerEdge Expandable RAID Controller 3
+ 1027 0121 PowerEdge Expandable RAID Controller 3/Di
+ 1028 0106 PowerEdge Expandable RAID Controller 3/Di
+ 1028 011b PowerEdge Expandable RAID Controller 3/Di
+ 000c Embedded Systems Management Device 4
+ 000e PowerEdge Expandable RAID Controller
+ 000f PowerEdge Expandable RAID Controller 4/Di
+1029 Siemens Nixdorf IS
+102a LSI Logic
+ 0000 HYDRA
+ 0010 ASPEN
+102b Matrox Graphics, Inc.
+# DJ: I've a suspicion that 0010 is a duplicate of 0d10.
+ 0010 MGA-I [Impression?]
+ 0518 MGA-II [Athena]
+ 0519 MGA 2064W [Millennium]
+ 051a MGA 1064SG [Mystique]
+ 102b 1100 MGA-1084SG Mystique
+ 102b 1200 MGA-1084SG Mystique
+ 1100 102b MGA-1084SG Mystique
+ 110a 0018 Scenic Pro C5 (D1025)
+ 051b MGA 2164W [Millennium II]
+ 102b 051b MGA-2164W Millennium II
+ 102b 1100 MGA-2164W Millennium II
+ 102b 1200 MGA-2164W Millennium II
+ 051e MGA 1064SG [Mystique] AGP
+ 051f MGA 2164W [Millennium II] AGP
+ 0520 MGA G200
+ 102b dbc2 G200 Multi-Monitor
+ 102b dbc8 G200 Multi-Monitor
+ 102b dbe2 G200 Multi-Monitor
+ 102b dbe8 G200 Multi-Monitor
+ 102b ff03 Millennium G200 SD
+ 102b ff04 Marvel G200
+ 0521 MGA G200 AGP
+ 1014 ff03 Millennium G200 AGP
+ 102b 48e9 Mystique G200 AGP
+ 102b 48f8 Millennium G200 SD AGP
+ 102b 4a60 Millennium G200 LE AGP
+ 102b 4a64 Millennium G200 AGP
+ 102b c93c Millennium G200 AGP
+ 102b c9b0 Millennium G200 AGP
+ 102b c9bc Millennium G200 AGP
+ 102b ca60 Millennium G250 LE AGP
+ 102b ca6c Millennium G250 AGP
+ 102b dbbc Millennium G200 AGP
+ 102b dbc2 Millennium G200 MMS (Dual G200)
+ 102b dbc3 G200 Multi-Monitor
+ 102b dbc8 Millennium G200 MMS (Dual G200)
+ 102b dbd2 G200 Multi-Monitor
+ 102b dbd3 G200 Multi-Monitor
+ 102b dbd4 G200 Multi-Monitor
+ 102b dbd5 G200 Multi-Monitor
+ 102b dbd8 G200 Multi-Monitor
+ 102b dbd9 G200 Multi-Monitor
+ 102b dbe2 Millennium G200 MMS (Quad G200)
+ 102b dbe3 G200 Multi-Monitor
+ 102b dbe8 Millennium G200 MMS (Quad G200)
+ 102b dbf2 G200 Multi-Monitor
+ 102b dbf3 G200 Multi-Monitor
+ 102b dbf4 G200 Multi-Monitor
+ 102b dbf5 G200 Multi-Monitor
+ 102b dbf8 G200 Multi-Monitor
+ 102b dbf9 G200 Multi-Monitor
+ 102b f806 Mystique G200 Video AGP
+ 102b ff00 MGA-G200 AGP
+ 102b ff02 Mystique G200 AGP
+ 102b ff03 Millennium G200 AGP
+ 102b ff04 Marvel G200 AGP
+ 110a 0032 MGA-G200 AGP
+ 0525 MGA G400 AGP
+ 0e11 b16f MGA-G400 AGP
+ 102b 0328 Millennium G400 16Mb SDRAM
+ 102b 0338 Millennium G400 16Mb SDRAM
+ 102b 0378 Millennium G400 32Mb SDRAM
+ 102b 0541 Millennium G450 Dual Head
+ 102b 0542 Millennium G450 Dual Head LX
+ 102b 0543 Millennium G450 Single Head LX
+ 102b 0641 Millennium G450 32Mb SDRAM Dual Head
+ 102b 0642 Millennium G450 32Mb SDRAM Dual Head LX
+ 102b 0643 Millennium G450 32Mb SDRAM Single Head LX
+ 102b 07c0 Millennium G450 Dual Head LE
+ 102b 07c1 Millennium G450 SDR Dual Head LE
+ 102b 0d41 Millennium G450 Dual Head PCI
+ 102b 0d42 Millennium G450 Dual Head LX PCI
+ 102b 0e00 Marvel G450 eTV
+ 102b 0e01 Marvel G450 eTV
+ 102b 0e02 Marvel G450 eTV
+ 102b 0e03 Marvel G450 eTV
+ 102b 0f80 Millennium G450 Low Profile
+ 102b 0f81 Millennium G450 Low Profile
+ 102b 0f82 Millennium G450 Low Profile DVI
+ 102b 0f83 Millennium G450 Low Profile DVI
+ 102b 19d8 Millennium G400 16Mb SGRAM
+ 102b 19f8 Millennium G400 32Mb SGRAM
+ 102b 2159 Millennium G400 Dual Head 16Mb
+ 102b 2179 Millennium G400 MAX/Dual Head 32Mb
+ 102b 217d Millennium G400 Dual Head Max
+ 102b 23c0 Millennium G450
+ 102b 23c1 Millennium G450
+ 102b 23c2 Millennium G450 DVI
+ 102b 23c3 Millennium G450 DVI
+ 102b 2f58 Millennium G400
+ 102b 2f78 Millennium G400
+ 102b 3693 Marvel G400 AGP
+ 102b 5dd0 4Sight II
+ 102b 5f50 4Sight II
+ 102b 5f51 4Sight II
+ 102b 5f52 4Sight II
+ 102b 9010 Millennium G400 Dual Head
+ 1458 0400 GA-G400
+ 1705 0001 Digital First Millennium G450 32MB SGRAM
+ 1705 0002 Digital First Millennium G450 16MB SGRAM
+ 1705 0003 Digital First Millennium G450 32MB
+ 1705 0004 Digital First Millennium G450 16MB
+ b16f 0e11 MGA-G400 AGP
+ 0527 MGA Parhelia AGP
+ 102b 0840 Parhelia 128Mb
+ 0d10 MGA Ultima/Impression
+ 1000 MGA G100 [Productiva]
+ 102b ff01 Productiva G100
+ 102b ff05 Productiva G100 Multi-Monitor
+ 1001 MGA G100 [Productiva] AGP
+ 102b 1001 MGA-G100 AGP
+ 102b ff00 MGA-G100 AGP
+ 102b ff01 MGA-G100 Productiva AGP
+ 102b ff03 Millennium G100 AGP
+ 102b ff04 MGA-G100 AGP
+ 102b ff05 MGA-G100 Productiva AGP Multi-Monitor
+ 110a 001e MGA-G100 AGP
+ 2007 MGA Mistral
+ 2527 MGA G550 AGP
+ 102b 0f83 Millennium G550
+ 102b 0f84 Millennium G550 Dual Head DDR 32Mb
+ 102b 1e41 Millennium G550
+ 4536 VIA Framegrabber
+ 6573 Shark 10/100 Multiport SwitchNIC
+102c Chips and Technologies
+ 00b8 F64310
+ 00c0 F69000 HiQVideo
+ 102c 00c0 F69000 HiQVideo
+ 00d0 F65545
+ 00d8 F65545
+ 00dc F65548
+ 00e0 F65550
+ 00e4 F65554
+ 00e5 F65555 HiQVPro
+ 0e11 b049 Armada 1700 Laptop Display Controller
+ 00f0 F68554
+ 00f4 F68554 HiQVision
+ 00f5 F68555
+ 0c30 F69030
+102d Wyse Technology Inc.
+ 50dc 3328 Audio
+102e Olivetti Advanced Technology
+102f Toshiba America
+ 0009 r4x00
+ 0020 ATM Meteor 155
+ 102f 00f8 ATM Meteor 155
+1030 TMC Research
+1031 Miro Computer Products AG
+ 5601 DC20 ASIC
+ 5607 Video I/O & motion JPEG compressor
+ 5631 Media 3D
+ 6057 MiroVideo DC10/DC30+
+1032 Compaq
+1033 NEC Corporation
+ 0001 PCI to 486-like bus Bridge
+ 0002 PCI to VL98 Bridge
+ 0003 ATM Controller
+ 0004 R4000 PCI Bridge
+ 0005 PCI to 486-like bus Bridge
+ 0006 GUI Accelerator
+ 0007 PCI to UX-Bus Bridge
+ 0008 GUI Accelerator
+ 0009 GUI Accelerator for W98
+ 001a [Nile II]
+ 0021 Vrc4373 [Nile I]
+ 0029 PowerVR PCX1
+ 002a PowerVR 3D
+ 0035 USB
+ 1179 0001 USB
+ 12ee 7000 Root Hub
+ 003e NAPCCARD Cardbus Controller
+ 0046 PowerVR PCX2 [midas]
+ 005a Vrc5074 [Nile 4]
+ 0063 Firewarden
+ 0067 PowerVR Neon 250 Chipset
+ 1010 0020 PowerVR Neon 250 AGP 32Mb
+ 1010 0080 PowerVR Neon 250 AGP 16Mb
+ 1010 0088 PowerVR Neon 250 16Mb
+ 1010 0090 PowerVR Neon 250 AGP 16Mb
+ 1010 0098 PowerVR Neon 250 16Mb
+ 1010 00a0 PowerVR Neon 250 AGP 32Mb
+ 1010 00a8 PowerVR Neon 250 32Mb
+ 1010 0120 PowerVR Neon 250 AGP 32Mb
+ 0074 56k Voice Modem
+ 1033 8014 RCV56ACF 56k Voice Modem
+ 009b Vrc5476
+ 00a6 VRC5477 AC97
+ 00cd IEEE 1394 [OrangeLink] Host Controller
+ 12ee 8011 Root hub
+ 00e0 USB 2.0
+ 12ee 7001 Root hub
+1034 Framatome Connectors USA Inc.
+1035 Comp. & Comm. Research Lab
+1036 Future Domain Corp.
+ 0000 TMC-18C30 [36C70]
+1037 Hitachi Micro Systems
+1038 AMP, Inc
+1039 Silicon Integrated Systems [SiS]
+ 0001 5591/5592 AGP
+ 0002 SG86C202
+ 0006 85C501/2/3
+ 0008 85C503/5513
+ 0009 ACPI
+ 0018 SiS85C503/5513 (LPC Bridge)
+ 0200 5597/5598/6326 VGA
+ 1039 0000 SiS5597 SVGA (Shared RAM)
+ 0204 82C204
+ 0205 SG86C205
+ 0300 300/200
+ 107d 2720 Leadtek WinFast VR300
+ 0406 85C501/2
+ 0496 85C496
+ 0530 530 Host
+ 0540 540 Host
+ 0597 5513C
+ 0601 85C601
+ 0620 620 Host
+ 0630 630 Host
+ 0633 633 Host
+ 0635 635 Host
+ 0645 645 Host
+ 0646 645DX Host
+ 0650 650 Host
+ 0730 730 Host
+ 0733 733 Host
+ 0735 735 Host
+ 0740 740 Host
+ 0745 745 Host
+ 0900 SiS900 10/100 Ethernet
+ 1039 0900 SiS900 10/100 Ethernet Adapter
+ 0961 SiS961 [MuTIOL Media IO]
+ 3602 83C602
+ 5107 5107
+ 5300 SiS540 PCI Display Adapter
+ 5401 486 PCI Chipset
+ 5511 5511/5512
+ 5513 5513 [IDE]
+ 1039 5513 SiS5513 EIDE Controller (A,B step)
+ 5517 5517
+ 5571 5571
+ 5581 5581 Pentium Chipset
+ 5582 5582
+ 5591 5591/5592 Host
+ 5596 5596 Pentium Chipset
+ 5597 5597 [SiS5582]
+ 5600 5600 Host
+ 6204 Video decoder & MPEG interface
+ 6205 VGA Controller
+ 6236 6236 3D-AGP
+ 6300 SiS630 GUI Accelerator+3D
+ 6306 6306 3D-AGP
+ 1039 6306 SiS530,620 GUI Accelerator+3D
+ 6326 86C326 5598/6326
+ 1039 6326 SiS6326 GUI Accelerator
+ 1092 0a50 SpeedStar A50
+ 1092 0a70 SpeedStar A70
+ 1092 4910 SpeedStar A70
+ 1092 4920 SpeedStar A70
+ 1569 6326 SiS6326 GUI Accelerator
+ 7001 7001
+ 7007 FireWire Controller
+ 7012 SiS7012 PCI Audio Accelerator
+ 7013 56k Winmodem (Smart Link HAMR5600 compatible)
+ 7016 SiS7016 10/100 Ethernet Adapter
+ 1039 7016 SiS7016 10/100 Ethernet Adapter
+ 7018 SiS PCI Audio Accelerator
+ 1014 01b6 SiS PCI Audio Accelerator
+ 1014 01b7 SiS PCI Audio Accelerator
+ 1019 7018 SiS PCI Audio Accelerator
+ 1025 000e SiS PCI Audio Accelerator
+ 1025 0018 SiS PCI Audio Accelerator
+ 1039 7018 SiS PCI Audio Accelerator
+ 1043 800b SiS PCI Audio Accelerator
+ 1054 7018 SiS PCI Audio Accelerator
+ 107d 5330 SiS PCI Audio Accelerator
+ 107d 5350 SiS PCI Audio Accelerator
+ 1170 3209 SiS PCI Audio Accelerator
+ 1462 400a SiS PCI Audio Accelerator
+ 14a4 2089 SiS PCI Audio Accelerator
+ 14cd 2194 SiS PCI Audio Accelerator
+ 14ff 1100 SiS PCI Audio Accelerator
+ 152d 8808 SiS PCI Audio Accelerator
+ 1558 1103 SiS PCI Audio Accelerator
+ 1558 2200 SiS PCI Audio Accelerator
+ 1563 7018 SiS PCI Audio Accelerator
+ 15c5 0111 SiS PCI Audio Accelerator
+ 270f a171 SiS PCI Audio Accelerator
+ a0a0 0022 SiS PCI Audio Accelerator
+103a Seiko Epson Corporation
+103b Tatung Co. of America
+103c Hewlett-Packard Company
+ 1005 A4977A Visualize EG
+ 1006 Visualize FX6
+ 1008 Visualize FX4
+ 100a Visualize FX2
+ 1028 Tach TL Fibre Channel Host Adapter
+ 1029 Tach XL2 Fibre Channel Host Adapter
+ 107e 000f Interphase 5560 Fibre Channel Adapter
+ 9004 9210 1Gb/2Gb Family Fibre Channel Controller
+ 9004 9211 1Gb/2Gb Family Fibre Channel Controller
+ 102a Tach TS Fibre Channel Host Adapter
+ 107e 000e Interphase 5540/5541 Fibre Channel Adapter
+ 9004 9110 1Gb/2Gb Family Fibre Channel Controller
+ 9004 9111 1Gb/2Gb Family Fibre Channel Controller
+ 1030 J2585A DeskDirect 10/100VG NIC
+ 1031 J2585B HP 10/100VG PCI LAN Adapter
+ 103c 1040 J2973A DeskDirect 10BaseT NIC
+ 103c 1041 J2585B DeskDirect 10/100VG NIC
+ 103c 1042 J2970A DeskDirect 10BaseT/2 NIC
+ 1040 J2973A DeskDirect 10BaseT NIC
+ 1041 J2585B DeskDirect 10/100 NIC
+ 1042 J2970A DeskDirect 10BaseT/2 NIC
+ 1048 Diva Serial [GSP] Multiport UART
+ 103c 1049 Tosca Console
+ 103c 104a Tosca Secondary
+ 103c 104b Maestro SP2
+ 103c 1223 Halfdome Console
+ 103c 1226 Keystone SP2
+ 103c 1227 Powerbar SP2
+ 103c 1282 Everest SP2
+ 1064 79C970 PCnet Ethernet Controller
+ 108b Visualize FXe
+ 10c1 NetServer Smart IRQ Router
+ 10ed TopTools Remote Control
+ 1200 82557B 10/100 NIC
+ 1219 NetServer PCI Hot-Plug Controller
+ 121a NetServer SMIC Controller
+ 121b NetServer Legacy COM Port Decoder
+ 121c NetServer PCI COM Port Decoder
+ 1229 zx1 System Bus Adapter
+ 122a zx1 I/O Controller
+ 122e zx1 Local Bus Adapter
+ 1290 Auxiliary Diva Serial Port
+ 2910 E2910A PCIBus Exerciser
+ 2925 E2925A 32 Bit, 33 MHzPCI Exerciser & Analyzer
+103e Solliday Engineering
+103f Synopsys/Logic Modeling Group
+1040 Accelgraphics Inc.
+1041 Computrend
+1042 Micron
+ 1000 FDC 37C665
+ 1001 37C922
+ 3000 Samurai_0
+ 3010 Samurai_1
+ 3020 Samurai_IDE
+1043 Asustek Computer, Inc.
+ 0675 ISDNLink P-IN100-ST-D
+ 4057 V8200 GeForce 3
+1044 Distributed Processing Technology
+ 1012 Domino RAID Engine
+ a400 SmartCache/Raid I-IV Controller
+ a500 PCI Bridge
+ a501 SmartRAID V Controller
+ 1044 c001 PM1554U2 Ultra2 Single Channel
+ 1044 c002 PM1654U2 Ultra2 Single Channel
+ 1044 c003 PM1564U3 Ultra3 Single Channel
+ 1044 c004 PM1564U3 Ultra3 Dual Channel
+ 1044 c005 PM1554U2 Ultra2 Single Channel (NON ACPI)
+ 1044 c00a PM2554U2 Ultra2 Single Channel
+ 1044 c00b PM2654U2 Ultra2 Single Channel
+ 1044 c00c PM2664U3 Ultra3 Single Channel
+ 1044 c00d PM2664U3 Ultra3 Dual Channel
+ 1044 c00e PM2554U2 Ultra2 Single Channel (NON ACPI)
+ 1044 c00f PM2654U2 Ultra2 Single Channel (NON ACPI)
+ 1044 c014 PM3754U2 Ultra2 Single Channel (NON ACPI)
+ 1044 c015 PM3755U2B Ultra2 Single Channel (NON ACPI)
+ 1044 c016 PM3755F Fibre Channel (NON ACPI)
+ 1044 c01e PM3757U2 Ultra2 Single Channel
+ 1044 c01f PM3757U2 Ultra2 Dual Channel
+ 1044 c020 PM3767U3 Ultra3 Dual Channel
+ 1044 c021 PM3767U3 Ultra3 Quad Channel
+ 1044 c028 PM2865U3 Ultra3 Single Channel
+ 1044 c029 PM2865U3 Ultra3 Dual Channel
+ 1044 c02a PM2865F Fibre Channel
+ 1044 c03c 2000S Ultra3 Single Channel
+ 1044 c03d 2000S Ultra3 Dual Channel
+ 1044 c03e 2000F Fibre Channel
+ 1044 c046 3000S Ultra3 Single Channel
+ 1044 c047 3000S Ultra3 Dual Channel
+ 1044 c048 3000F Fibre Channel
+ 1044 c050 5000S Ultra3 Single Channel
+ 1044 c051 5000S Ultra3 Dual Channel
+ 1044 c052 5000F Fibre Channel
+ 1044 c05a 2400A UDMA Four Channel
+ 1044 c05b 2400A UDMA Four Channel DAC
+ 1044 c064 3010S Ultra3 Dual Channel
+ 1044 c065 3010S Ultra3 Four Channel
+ 1044 c066 3010S Fibre Channel
+ a511 SmartRAID V Controller
+1045 OPTi Inc.
+ a0f8 82C750 [Vendetta] USB Controller
+ c101 92C264
+ c178 92C178
+ c556 82X556 [Viper]
+ c557 82C557 [Viper-M]
+ c558 82C558 [Viper-M ISA+IDE]
+ c567 82C750 [Vendetta], device 0
+ c568 82C750 [Vendetta], device 1
+ c569 82C579 [Viper XPress+ Chipset]
+ c621 82C621 [Viper-M/N+]
+ c700 82C700 [FireStar]
+ c701 82C701 [FireStar Plus]
+ c814 82C814 [Firebridge 1]
+ c822 82C822
+ c824 82C824
+ c825 82C825 [Firebridge 2]
+ c832 82C832
+ c861 82C861
+ c895 82C895
+ c935 EV1935 ECTIVA MachOne PCI Audio
+ d568 82C825 [Firebridge 2]
+ d721 IDE [FireStar]
+1046 IPC Corporation, Ltd.
+1047 Genoa Systems Corp
+1048 Elsa AG
+ 0d22 Quadro4 900XGL [ELSA GLoria4 900XGL]
+ 1000 QuickStep 1000
+ 3000 QuickStep 3000
+1049 Fountain Technologies, Inc.
+104a SGS Thomson Microelectronics
+ 0008 STG 2000X
+ 0009 STG 1764X
+ 0981 DEC-Tulip compatible 10/100 Ethernet
+ 1746 STG 1764X
+ 2774 DEC-Tulip compatible 10/100 Ethernet
+ 3520 MPEG-II decoder card
+104b BusLogic
+ 0140 BT-946C (old) [multimaster 01]
+ 1040 BT-946C (BA80C30) [MultiMaster 10]
+ 8130 Flashpoint LT
+104c Texas Instruments
+ 0500 100 MBit LAN Controller
+ 0508 TMS380C2X Compressor Interface
+ 1000 Eagle i/f AS
+ 3d04 TVP4010 [Permedia]
+ 3d07 TVP4020 [Permedia 2]
+ 1011 4d10 Comet
+ 1040 000f AccelStar II
+ 1040 0011 AccelStar II
+ 1048 0a31 WINNER 2000
+ 1048 0a32 GLoria Synergy
+ 1048 0a35 GLoria Synergy
+ 107d 2633 WinFast 3D L2300
+ 1092 0127 FIRE GL 1000 PRO
+ 1092 0136 FIRE GL 1000 PRO
+ 1092 0141 FIRE GL 1000 PRO
+ 1092 0146 FIRE GL 1000 PRO
+ 1092 0148 FIRE GL 1000 PRO
+ 1092 0149 FIRE GL 1000 PRO
+ 1092 0152 FIRE GL 1000 PRO
+ 1092 0154 FIRE GL 1000 PRO
+ 1092 0155 FIRE GL 1000 PRO
+ 1092 0156 FIRE GL 1000 PRO
+ 1092 0157 FIRE GL 1000 PRO
+ 1097 3d01 Jeronimo Pro
+ 1102 100f Graphics Blaster Extreme
+ 3d3d 0100 Reference Permedia 2 3D
+ 8000 PCILynx/PCILynx2 IEEE 1394 Link Layer Controller
+ e4bf 1010 CF1-1-SNARE
+ e4bf 1020 CF1-2-SNARE
+ 8009 FireWire Controller
+ 104d 8032 8032 OHCI i.LINK (IEEE 1394) Controller
+ 8017 PCI4410 FireWire Controller
+ 8019 TSB12LV23 IEEE-1394 Controller
+ 11bd 000a Studio DV500-1394
+ 11bd 000e Studio DV
+ e4bf 1010 CF2-1-CYMBAL
+ 8020 TSB12LV26 IEEE-1394 Controller (Link)
+ 8021 TSB43AA22 IEEE-1394 Controller (PHY/Link Integrated)
+ 104d 80df Vaio PCG-FX403
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 8022 TSB43AB22 IEEE-1394a-2000 Controller (PHY/Link)
+ 8023 TSB43AB22/A IEEE-1394a-2000 Controller (PHY/Link)
+ 8024 TSB43AB23 IEEE-1394a-2000 Controller (PHY/Link)
+ 8026 TSB43AB21 IEEE-1394a-2000 Controller (PHY/Link)
+ 8027 PCI4451 IEEE-1394 Controller
+ 1028 00e6 PCI4451 IEEE-1394 Controller (Dell Inspiron 8100)
+ a001 TDC1570
+ a100 TDC1561
+ a102 TNETA1575 HyperSAR Plus w/PCI Host i/f & UTOPIA i/f
+ ac10 PCI1050
+ ac11 PCI1053
+ ac12 PCI1130
+ ac13 PCI1031
+ ac15 PCI1131
+ ac16 PCI1250
+ ac17 PCI1220
+ ac18 PCI1260
+ ac19 PCI1221
+ ac1a PCI1210
+ ac1b PCI1450
+ ac1c PCI1225
+ ac1d PCI1251A
+ ac1e PCI1211
+ ac1f PCI1251B
+ ac20 TI 2030
+ ac21 PCI2031
+ ac22 PCI2032 PCI Docking Bridge
+ ac23 PCI2250 PCI-to-PCI Bridge
+ ac28 PCI2050 PCI-to-PCI Bridge
+ ac30 PCI1260 PC card Cardbus Controller
+ ac40 PCI4450 PC card Cardbus Controller
+ ac41 PCI4410 PC card Cardbus Controller
+ ac42 PCI4451 PC card Cardbus Controller
+ 1028 00e6 PCI4451 PC card CardBus Controller (Dell Inspiron 8100)
+ ac50 PCI1410 PC card Cardbus Controller
+ ac51 PCI1420
+ 1014 023b ThinkPad T23 (2647-4MG)
+ 10cf 1095 Lifebook C6155
+ e4bf 1000 CP2-2-HIPHOP
+ ac52 PCI1451 PC card Cardbus Controller
+ ac53 PCI1421 PC card Cardbus Controller
+ ac55 PCI1250 PC card Cardbus Controller
+ ac60 PCI2040 PCI to DSP Bridge Controller
+ fe00 FireWire Host Controller
+ fe03 12C01A FireWire Host Controller
+104d Sony Corporation
+ 8009 CXD1947Q i.LINK Controller
+ 8039 CXD3222 i.LINK Controller
+ 8056 Rockwell HCF 56K modem
+ 808a Memory Stick Controller
+104e Oak Technology, Inc
+ 0017 OTI-64017
+ 0107 OTI-107 [Spitfire]
+ 0109 Video Adapter
+ 0111 OTI-64111 [Spitfire]
+ 0217 OTI-64217
+ 0317 OTI-64317
+104f Co-time Computer Ltd
+1050 Winbond Electronics Corp
+ 0000 NE2000
+ 0001 W83769F
+ 0105 W82C105
+ 0840 W89C840
+ 1050 0001 W89C840 Ethernet Adapter
+ 1050 0840 W89C840 Ethernet Adapter
+ 0940 W89C940
+ 5a5a W89C940F
+ 9970 W9970CF
+1051 Anigma, Inc.
+1052 ?Young Micro Systems
+1053 Young Micro Systems
+1054 Hitachi, Ltd
+1055 Efar Microsystems
+ 9130 SLC90E66 [Victory66] IDE
+ 9460 SLC90E66 [Victory66] ISA
+ 9462 SLC90E66 [Victory66] USB
+ 9463 SLC90E66 [Victory66] ACPI
+1056 ICL
+# Motorola made a mistake and used 1507 instead of 1057 in some chips. Please look at the 1507 entry as well when updating this.
+1057 Motorola
+ 0001 MPC105 [Eagle]
+ 0002 MPC106 [Grackle]
+ 0003 MPC8240 [Kahlua]
+ 0100 MC145575 [HFC-PCI]
+ 0431 KTI829c 100VG
+ 1801 Audio I/O Controller (MIDI)
+ ecc0 0030 Layla
+ 18c0 MPC8265A/MPC8266
+ 4801 Raven
+ 4802 Falcon
+ 4803 Hawk
+ 4806 CPX8216
+ 4d68 20268
+ 5600 SM56 PCI Modem
+ 1057 0300 SM56 PCI Speakerphone Modem
+ 1057 0301 SM56 PCI Voice Modem
+ 1057 0302 SM56 PCI Fax Modem
+ 1057 5600 SM56 PCI Voice modem
+ 13d2 0300 SM56 PCI Speakerphone Modem
+ 13d2 0301 SM56 PCI Voice modem
+ 13d2 0302 SM56 PCI Fax Modem
+ 1436 0300 SM56 PCI Speakerphone Modem
+ 1436 0301 SM56 PCI Voice modem
+ 1436 0302 SM56 PCI Fax Modem
+ 144f 100c SM56 PCI Fax Modem
+ 1494 0300 SM56 PCI Speakerphone Modem
+ 1494 0301 SM56 PCI Voice modem
+ 14c8 0300 SM56 PCI Speakerphone Modem
+ 14c8 0302 SM56 PCI Fax Modem
+ 1668 0300 SM56 PCI Speakerphone Modem
+ 1668 0302 SM56 PCI Fax Modem
+ 6400 MPC190 Security Processor (S1 family, encryption)
+1058 Electronics & Telecommunications RSH
+1059 Teknor Industrial Computers Inc
+105a Promise Technology, Inc.
+ 0d30 20265
+ 105a 4d33 Ultra100
+ 0d38 20263
+ 105a 4d39 Fasttrak66
+ 1275 20275
+ 4d30 20267
+ 105a 4d33 Ultra100
+ 105a 4d39 Fasttrak100
+ 4d33 20246
+ 105a 4d33 20246 IDE Controller
+ 4d38 20262
+ 105a 4d30 Ultra Device on SuperTrak
+ 105a 4d33 Ultra66
+ 105a 4d39 Fasttrak66
+ 4d68 20268
+ 105a 4d68 Ultra100TX2
+ 4d69 20269
+ 5275 PDC20276 IDE
+ 105a 0275 SuperTrak SX6000 IDE
+ 5300 DC5300
+ 6268 20268R
+ 6269 PDC20271
+ 105a 6269 Fasttrack tx2
+ 7275 PDC20277
+105b Foxconn International, Inc.
+105c Wipro Infotech Limited
+105d Number 9 Computer Company
+ 2309 Imagine 128
+ 2339 Imagine 128-II
+ 105d 0000 Imagine 128 series 2 4Mb VRAM
+ 105d 0001 Imagine 128 series 2 4Mb VRAM
+ 105d 0002 Imagine 128 series 2 4Mb VRAM
+ 105d 0003 Imagine 128 series 2 4Mb VRAM
+ 105d 0004 Imagine 128 series 2 4Mb VRAM
+ 105d 0005 Imagine 128 series 2 4Mb VRAM
+ 105d 0006 Imagine 128 series 2 4Mb VRAM
+ 105d 0007 Imagine 128 series 2 4Mb VRAM
+ 105d 0008 Imagine 128 series 2e 4Mb DRAM
+ 105d 0009 Imagine 128 series 2e 4Mb DRAM
+ 105d 000a Imagine 128 series 2 8Mb VRAM
+ 105d 000b Imagine 128 series 2 8Mb H-VRAM
+ 11a4 000a Barco Metheus 5 Megapixel
+ 13cc 0000 Barco Metheus 5 Megapixel
+ 13cc 0004 Barco Metheus 5 Megapixel
+ 13cc 0005 Barco Metheus 5 Megapixel
+ 13cc 0006 Barco Metheus 5 Megapixel
+ 13cc 0008 Barco Metheus 5 Megapixel
+ 13cc 0009 Barco Metheus 5 Megapixel
+ 13cc 000a Barco Metheus 5 Megapixel
+ 13cc 000c Barco Metheus 5 Megapixel
+ 493d Imagine 128 T2R [Ticket to Ride]
+ 11a4 000a Barco Metheus 5 Megapixel, Dual Head
+ 11a4 000b Barco Metheus 5 Megapixel, Dual Head
+ 13cc 0002 Barco Metheus 4 Megapixel, Dual Head
+ 13cc 0003 Barco Metheus 5 Megapixel, Dual Head
+ 13cc 0007 Barco Metheus 5 Megapixel, Dual Head
+ 13cc 0008 Barco Metheus 5 Megapixel, Dual Head
+ 13cc 0009 Barco Metheus 5 Megapixel, Dual Head
+ 13cc 000a Barco Metheus 5 Megapixel, Dual Head
+ 5348 Revolution 4
+105e Vtech Computers Ltd
+105f Infotronic America Inc
+1060 United Microelectronics [UMC]
+ 0001 UM82C881
+ 0002 UM82C886
+ 0101 UM8673F
+ 0881 UM8881
+ 0886 UM8886F
+ 0891 UM8891A
+ 1001 UM886A
+ 673a UM8886BF
+ 673b EIDE Master/DMA
+ 8710 UM8710
+ 886a UM8886A
+ 8881 UM8881F
+ 8886 UM8886F
+ 888a UM8886A
+ 8891 UM8891A
+ 9017 UM9017F
+ 9018 UM9018
+ 9026 UM9026
+ e881 UM8881N
+ e886 UM8886N
+ e88a UM8886N
+ e891 UM8891N
+1061 I.I.T.
+ 0001 AGX016
+ 0002 IIT3204/3501
+1062 Maspar Computer Corp
+1063 Ocean Office Automation
+1064 Alcatel
+1065 Texas Microsystems
+1066 PicoPower Technology
+ 0000 PT80C826
+ 0001 PT86C521 [Vesuvius v1] Host Bridge
+ 0002 PT86C523 [Vesuvius v3] PCI-ISA Bridge Master
+ 0003 PT86C524 [Nile] PCI-to-PCI Bridge
+ 0004 PT86C525 [Nile-II] PCI-to-PCI Bridge
+ 0005 National PC87550 System Controller
+ 8002 PT86C523 [Vesuvius v3] PCI-ISA Bridge Slave
+1067 Mitsubishi Electric
+ 1002 VG500 [VolumePro Volume Rendering Accelerator]
+1068 Diversified Technology
+1069 Mylex Corporation
+ 0001 DAC960P
+ 0002 DAC960PD
+ 0010 DAC960PX
+ 0050 AcceleRAID 352/170/160 support Device
+ ba55 eXtremeRAID 1100 support Device
+ ba56 eXtremeRAID 2000/3000 support Device
+106a Aten Research Inc
+106b Apple Computer Inc.
+ 0001 Bandit PowerPC host bridge
+ 0002 Grand Central I/O
+ 0003 Control Video
+ 0004 PlanB Video-In
+ 0007 O'Hare I/O
+ 000e Hydra Mac I/O
+ 0010 Heathrow Mac I/O
+ 0017 Paddington Mac I/O
+ 0018 UniNorth FireWire
+ 0019 KeyLargo USB
+ 001e UniNorth Internal PCI
+ 001f UniNorth PCI
+ 0020 UniNorth AGP
+ 0021 UniNorth GMAC (Sun GEM)
+ 0022 KeyLargo Mac I/O
+ 0024 UniNorth/Pangea GMAC (Sun GEM)
+ 0025 KeyLargo/Pangea Mac I/O
+ 0026 KeyLargo/Pangea USB
+ 0027 UniNorth/Pangea AGP
+ 0028 UniNorth/Pangea PCI
+ 0029 UniNorth/Pangea Internal PCI
+ 002d UniNorth 1.5 AGP
+ 002e UniNorth 1.5 PCI
+ 002f UniNorth 1.5 Internal PCI
+ 0030 UniNorth/Pangea FireWire
+106c Hyundai Electronics America
+ 8801 Dual Pentium ISA/PCI Motherboard
+ 8802 PowerPC ISA/PCI Motherboard
+ 8803 Dual Window Graphics Accelerator
+ 8804 LAN Controller
+ 8805 100-BaseT LAN
+106d Sequent Computer Systems
+106e DFI, Inc
+106f City Gate Development Ltd
+1070 Daewoo Telecom Ltd
+1071 Mitac
+1072 GIT Co Ltd
+1073 Yamaha Corporation
+ 0001 3D GUI Accelerator
+ 0002 YGV615 [RPA3 3D-Graphics Controller]
+ 0003 YMF-740
+ 0004 YMF-724
+ 1073 0004 YMF724-Based PCI Audio Adapter
+ 0005 DS1 Audio
+ 1073 0005 DS-XG PCI Audio CODEC
+ 0006 DS1 Audio
+ 0008 DS1 Audio
+ 1073 0008 DS-XG PCI Audio CODEC
+ 000a DS1L Audio
+ 1073 0004 DS-XG PCI Audio CODEC
+ 1073 000a DS-XG PCI Audio CODEC
+ 000c YMF-740C [DS-1L Audio Controller]
+ 107a 000c DS-XG PCI Audio CODEC
+ 000d YMF-724F [DS-1 Audio Controller]
+ 1073 000d DS-XG PCI Audio CODEC
+ 0010 YMF-744B [DS-1S Audio Controller]
+ 1073 0006 DS-XG PCI Audio CODEC
+ 1073 0010 DS-XG PCI Audio CODEC
+ 0012 YMF-754 [DS-1E Audio Controller]
+ 1073 0012 DS-XG PCI Audio Codec
+ 0020 DS-1 Audio
+ 2000 DS2416 Digital Mixing Card
+ 1073 2000 DS2416 Digital Mixing Card
+1074 NexGen Microsystems
+ 4e78 82c500/1
+1075 Advanced Integrations Research
+1076 Chaintech Computer Co. Ltd
+1077 QLogic Corp.
+ 1016 ISP10160 Single Channel Ultra3 SCSI Processor
+ 1020 ISP1020 Fast-wide SCSI
+ 1022 ISP1022 Fast-wide SCSI
+ 1080 ISP1080 SCSI Host Adapter
+ 1216 ISP12160 Dual Channel Ultra3 SCSI Processor
+ 101e 8471 QLA12160 on AMI MegaRAID
+ 101e 8493 QLA12160 on AMI MegaRAID
+ 1240 ISP1240 SCSI Host Adapter
+ 1280 ISP1280
+ 2020 ISP2020A Fast!SCSI Basic Adapter
+ 2100 QLA2100 64-bit Fibre Channel Adapter
+ 1077 0001 QLA2100 64-bit Fibre Channel Adapter
+ 2200 QLA2200
+ 2300 QLA2300 64-bit FC-AL Adapter
+ 2312 QLA2312 Fibre Channel Adapter
+1078 Cyrix Corporation
+ 0000 5510 [Grappa]
+ 0001 PCI Master
+ 0002 5520 [Cognac]
+ 0100 5530 Legacy [Kahlua]
+ 0101 5530 SMI [Kahlua]
+ 0102 5530 IDE [Kahlua]
+ 0103 5530 Audio [Kahlua]
+ 0104 5530 Video [Kahlua]
+ 0400 ZFMicro PCI Bridge
+ 0401 ZFMicro Chipset SMI
+ 0402 ZFMicro Chipset IDE
+ 0403 ZFMicro Expansion Bus
+1079 I-Bus
+107a NetWorth
+107b Gateway 2000
+107c LG Electronics [Lucky Goldstar Co. Ltd]
+107d LeadTek Research Inc.
+ 0000 P86C850
+107e Interphase Corporation
+ 0001 5515 ATM Adapter [Flipper]
+ 0002 100 VG AnyLan Controller
+ 0004 5526 Fibre Channel Host Adapter
+ 0005 x526 Fibre Channel Host Adapter
+ 0008 5525/5575 ATM Adapter (155 Mbit) [Atlantic]
+ 9003 5535-4P-BRI-ST
+ 9007 5535-4P-BRI-U
+ 9008 5535-1P-SR
+ 900c 5535-1P-SR-ST
+ 900e 5535-1P-SR-U
+ 9011 5535-1P-PRI
+ 9013 5535-2P-PRI
+ 9023 5536-4P-BRI-ST
+ 9027 5536-4P-BRI-U
+ 9031 5536-1P-PRI
+ 9033 5536-2P-PRI
+107f Data Technology Corporation
+ 0802 SL82C105
+1080 Contaq Microsystems
+ 0600 82C599
+ c691 Cypress CY82C691
+ c693 82c693
+1081 Supermac Technology
+ 0d47 Radius PCI to NuBUS Bridge
+1082 EFA Corporation of America
+1083 Forex Computer Corporation
+ 0001 FR710
+1084 Parador
+1085 Tulip Computers Int.B.V.
+1086 J. Bond Computer Systems
+1087 Cache Computer
+1088 Microcomputer Systems (M) Son
+1089 Data General Corporation
+108a SBS Technologies (formerly Bit3 Computer Corp.)
+ 0001 VME Bridge Model 617
+ 0010 VME Bridge Model 618
+ 0040 dataBLIZZARD
+ 3000 VME Bridge Model 2706
+108c Oakleigh Systems Inc.
+108d Olicom
+ 0001 Token-Ring 16/4 PCI Adapter (3136/3137)
+ 0002 16/4 Token Ring
+ 0004 RapidFire 3139 Token-Ring 16/4 PCI Adapter
+ 108d 0004 OC-3139/3140 RapidFire Token-Ring 16/4 Adapter
+ 0005 GoCard 3250 Token-Ring 16/4 CardBus PC Card
+ 0006 OC-3530 RapidFire Token-Ring 100
+ 0007 RapidFire 3141 Token-Ring 16/4 PCI Fiber Adapter
+ 108d 0007 OC-3141 RapidFire Token-Ring 16/4 Adapter
+ 0008 RapidFire 3540 HSTR 100/16/4 PCI Adapter
+ 108d 0008 OC-3540 RapidFire HSTR 100/16/4 Adapter
+ 0011 OC-2315
+ 0012 OC-2325
+ 0013 OC-2183/2185
+ 0014 OC-2326
+ 0019 OC-2327/2250 10/100 Ethernet Adapter
+ 108d 0016 OC-2327 Rapidfire 10/100 Ethernet Adapter
+ 108d 0017 OC-2250 GoCard 10/100 Ethernet Adapter
+ 0021 OC-6151/6152 [RapidFire ATM 155]
+ 0022 ATM Adapter
+108e Sun Microsystems Computer Corp.
+ 0001 EBUS
+ 1000 EBUS
+ 1001 Happy Meal
+ 1100 RIO EBUS
+ 1101 RIO GEM
+ 1102 RIO 1394
+ 1103 RIO USB
+ 2bad GEM
+ 5000 Simba Advanced PCI Bridge
+ 5043 SunPCI Co-processor
+ 8000 Psycho PCI Bus Module
+ 8001 Schizo PCI Bus Module
+ a000 Ultra IIi
+ a001 Ultra IIe
+108f Systemsoft
+1090 Encore Computer Corporation
+1091 Intergraph Corporation
+ 0020 3D graphics processor
+ 0021 3D graphics processor w/Texturing
+ 0040 3D graphics frame buffer
+ 0041 3D graphics frame buffer
+ 0060 Proprietary bus bridge
+ 00e4 Powerstorm 4D50T
+ 0720 Motion JPEG codec
+1092 Diamond Multimedia Systems
+ 00a0 Speedstar Pro SE
+ 00a8 Speedstar 64
+ 0550 Viper V550
+ 08d4 Supra 2260 Modem
+ 094c SupraExpress 56i Pro
+ 1092 Viper V330
+ 6120 Maximum DVD
+ 8810 Stealth SE
+ 8811 Stealth 64/SE
+ 8880 Stealth
+ 8881 Stealth
+ 88b0 Stealth 64
+ 88b1 Stealth 64
+ 88c0 Stealth 64
+ 88c1 Stealth 64
+ 88d0 Stealth 64
+ 88d1 Stealth 64
+ 88f0 Stealth 64
+ 88f1 Stealth 64
+ 9999 DMD-I0928-1 "Monster sound" sound chip
+1093 National Instruments
+ 0160 PCI-DIO-96
+ 0162 PCI-MIO-16XE-50
+ 1170 PCI-MIO-16XE-10
+ 1180 PCI-MIO-16E-1
+ 1190 PCI-MIO-16E-4
+ 1330 PCI-6031E
+ 1350 PCI-6071E
+ 2a60 PCI-6023E
+ b001 IMAQ-PCI-1408
+ b011 IMAQ-PXI-1408
+ b021 IMAQ-PCI-1424
+ b031 IMAQ-PCI-1413
+ b041 IMAQ-PCI-1407
+ b051 IMAQ-PXI-1407
+ b061 IMAQ-PCI-1411
+ b071 IMAQ-PCI-1422
+ b081 IMAQ-PXI-1422
+ b091 IMAQ-PXI-1411
+ c801 PCI-GPIB
+1094 First International Computers [FIC]
+1095 CMD Technology Inc
+ 0640 PCI0640
+ 0643 PCI0643
+ 0646 PCI0646
+ 0647 PCI0647
+ 0648 PCI0648
+ 0649 PCI0649
+ 0e11 005d Integrated Ultra ATA-100 Dual Channel Controller
+ 0e11 007e Integrated Ultra ATA-100 IDE RAID Controller
+ 101e 0649 AMI MegaRAID IDE 100 Controller
+ 0650 PBC0650A
+ 0670 USB0670
+ 1095 0670 USB0670
+ 0673 USB0673
+ 0680 PCI0680
+1096 Alacron
+1097 Appian Technology
+1098 Quantum Designs (H.K.) Ltd
+ 0001 QD-8500
+ 0002 QD-8580
+1099 Samsung Electronics Co., Ltd
+109a Packard Bell
+109b Gemlight Computer Ltd.
+109c Megachips Corporation
+109d Zida Technologies Ltd.
+109e Brooktree Corporation
+ 0350 Bt848 Video Capture
+ 0351 Bt849A Video capture
+ 0369 Bt878 Video Capture
+ 1002 0001 TV-Wonder
+ 1002 0003 TV-Wonder/VE
+ 036c Bt879(??) Video Capture
+ 13e9 0070 Win/TV (Video Section)
+ 036e Bt878 Video Capture
+ 0070 13eb WinTV/GO
+ 0070 ff01 Viewcast Osprey 200
+ 11bd 001c PCTV Sat (DBC receiver)
+ 127a 0001 Bt878 Mediastream Controller NTSC
+ 127a 0002 Bt878 Mediastream Controller PAL BG
+ 127a 0003 Bt878a Mediastream Controller PAL BG
+ 127a 0048 Bt878/832 Mediastream Controller
+ 144f 3000 MagicTView CPH060 - Video
+ 1461 0004 AVerTV WDM Video Capture
+ 14f1 0001 Bt878 Mediastream Controller NTSC
+ 14f1 0002 Bt878 Mediastream Controller PAL BG
+ 14f1 0003 Bt878a Mediastream Controller PAL BG
+ 14f1 0048 Bt878/832 Mediastream Controller
+ 1851 1850 FlyVideo'98 - Video
+ 1851 1851 FlyVideo II
+ 1852 1852 FlyVideo'98 - Video (with FM Tuner)
+ 036f Bt879 Video Capture
+ 127a 0044 Bt879 Video Capture NTSC
+ 127a 0122 Bt879 Video Capture PAL I
+ 127a 0144 Bt879 Video Capture NTSC
+ 127a 0222 Bt879 Video Capture PAL BG
+ 127a 0244 Bt879a Video Capture NTSC
+ 127a 0322 Bt879 Video Capture NTSC
+ 127a 0422 Bt879 Video Capture NTSC
+ 127a 1122 Bt879 Video Capture PAL I
+ 127a 1222 Bt879 Video Capture PAL BG
+ 127a 1322 Bt879 Video Capture NTSC
+ 127a 1522 Bt879a Video Capture PAL I
+ 127a 1622 Bt879a Video Capture PAL BG
+ 127a 1722 Bt879a Video Capture NTSC
+ 14f1 0044 Bt879 Video Capture NTSC
+ 14f1 0122 Bt879 Video Capture PAL I
+ 14f1 0144 Bt879 Video Capture NTSC
+ 14f1 0222 Bt879 Video Capture PAL BG
+ 14f1 0244 Bt879a Video Capture NTSC
+ 14f1 0322 Bt879 Video Capture NTSC
+ 14f1 0422 Bt879 Video Capture NTSC
+ 14f1 1122 Bt879 Video Capture PAL I
+ 14f1 1222 Bt879 Video Capture PAL BG
+ 14f1 1322 Bt879 Video Capture NTSC
+ 14f1 1522 Bt879a Video Capture PAL I
+ 14f1 1622 Bt879a Video Capture PAL BG
+ 14f1 1722 Bt879a Video Capture NTSC
+ 1851 1850 FlyVideo'98 - Video
+ 1851 1851 FlyVideo II
+ 1852 1852 FlyVideo'98 - Video (with FM Tuner)
+ 0370 Bt880 Video Capture
+ 1851 1850 FlyVideo'98
+ 1851 1851 FlyVideo'98 EZ - video
+ 1852 1852 FlyVideo'98 (with FM Tuner)
+ 0878 Bt878 Audio Capture
+ 0070 13eb WinTV/GO
+ 0070 ff01 Viewcast Osprey 200
+ 1002 0001 TV-Wonder
+ 1002 0003 TV-Wonder/VE
+ 11bd 001c PCTV Sat (DBC receiver)
+ 127a 0001 Bt878 Video Capture (Audio Section)
+ 127a 0002 Bt878 Video Capture (Audio Section)
+ 127a 0003 Bt878 Video Capture (Audio Section)
+ 127a 0048 Bt878 Video Capture (Audio Section)
+ 13e9 0070 Win/TV (Audio Section)
+ 144f 3000 MagicTView CPH060 - Audio
+ 1461 0004 AVerTV WDM Audio Capture
+ 14f1 0001 Bt878 Video Capture (Audio Section)
+ 14f1 0002 Bt878 Video Capture (Audio Section)
+ 14f1 0003 Bt878 Video Capture (Audio Section)
+ 14f1 0048 Bt878 Video Capture (Audio Section)
+ 0879 Bt879 Audio Capture
+ 127a 0044 Bt879 Video Capture (Audio Section)
+ 127a 0122 Bt879 Video Capture (Audio Section)
+ 127a 0144 Bt879 Video Capture (Audio Section)
+ 127a 0222 Bt879 Video Capture (Audio Section)
+ 127a 0244 Bt879 Video Capture (Audio Section)
+ 127a 0322 Bt879 Video Capture (Audio Section)
+ 127a 0422 Bt879 Video Capture (Audio Section)
+ 127a 1122 Bt879 Video Capture (Audio Section)
+ 127a 1222 Bt879 Video Capture (Audio Section)
+ 127a 1322 Bt879 Video Capture (Audio Section)
+ 127a 1522 Bt879 Video Capture (Audio Section)
+ 127a 1622 Bt879 Video Capture (Audio Section)
+ 127a 1722 Bt879 Video Capture (Audio Section)
+ 14f1 0044 Bt879 Video Capture (Audio Section)
+ 14f1 0122 Bt879 Video Capture (Audio Section)
+ 14f1 0144 Bt879 Video Capture (Audio Section)
+ 14f1 0222 Bt879 Video Capture (Audio Section)
+ 14f1 0244 Bt879 Video Capture (Audio Section)
+ 14f1 0322 Bt879 Video Capture (Audio Section)
+ 14f1 0422 Bt879 Video Capture (Audio Section)
+ 14f1 1122 Bt879 Video Capture (Audio Section)
+ 14f1 1222 Bt879 Video Capture (Audio Section)
+ 14f1 1322 Bt879 Video Capture (Audio Section)
+ 14f1 1522 Bt879 Video Capture (Audio Section)
+ 14f1 1622 Bt879 Video Capture (Audio Section)
+ 14f1 1722 Bt879 Video Capture (Audio Section)
+ 0880 Bt880 Audio Capture
+ 2115 BtV 2115 Mediastream controller
+ 2125 BtV 2125 Mediastream controller
+ 2164 BtV 2164
+ 2165 BtV 2165
+ 8230 Bt8230 ATM Segment/Reassembly Ctrlr (SRC)
+ 8472 Bt8472
+ 8474 Bt8474
+109f Trigem Computer Inc.
+10a0 Meidensha Corporation
+10a1 Juko Electronics Ind. Co. Ltd
+10a2 Quantum Corporation
+10a3 Everex Systems Inc
+10a4 Globe Manufacturing Sales
+10a5 Racal Interlan
+10a6 Informtech Industrial Ltd.
+10a7 Benchmarq Microelectronics
+10a8 Sierra Semiconductor
+ 0000 STB Horizon 64
+10a9 Silicon Graphics, Inc.
+ 0001 Crosstalk to PCI Bridge
+ 0002 Linc I/O controller
+ 0003 IOC3 I/O controller
+ 0004 O2 MACE
+ 0005 RAD Audio
+ 0006 HPCEX
+ 0007 RPCEX
+ 0008 DiVO VIP
+ 0009 Alteon Gigabit Ethernet
+ 0010 AMP Video I/O
+ 0011 GRIP
+ 0012 SGH PSHAC GSN
+ 1001 Magic Carpet
+ 1002 Lithium
+ 1003 Dual JPEG 1
+ 1004 Dual JPEG 2
+ 1005 Dual JPEG 3
+ 1006 Dual JPEG 4
+ 1007 Dual JPEG 5
+ 1008 Cesium
+ 2001 Fibre Channel
+ 2002 ASDE
+ 8001 O2 1394
+ 8002 G-net NT
+10aa ACC Microelectronics
+ 0000 ACCM 2188
+10ab Digicom
+10ac Honeywell IAC
+10ad Symphony Labs
+ 0001 W83769F
+ 0003 SL82C103
+ 0005 SL82C105
+ 0103 SL82c103
+ 0105 SL82c105
+ 0565 W83C553
+10ae Cornerstone Technology
+10af Micro Computer Systems Inc
+10b0 CardExpert Technology
+10b1 Cabletron Systems Inc
+10b2 Raytheon Company
+10b3 Databook Inc
+ 3106 DB87144
+ b106 DB87144
+10b4 STB Systems Inc
+ 1b1d Velocity 128 3D
+ 10b4 237e Velocity 4400
+10b5 PLX Technology, Inc.
+ 0001 i960 PCI bus interface
+ 1076 VScom 800 8 port serial adaptor
+ 1077 VScom 400 4 port serial adaptor
+ 1078 VScom 210 2 port serial and 1 port parallel adaptor
+ 1103 VScom 200 2 port serial adaptor
+ 1146 VScom 010 1 port parallel adaptor
+ 1147 VScom 020 2 port parallel adaptor
+ 2724 Thales PCSM Security Card
+ 9030 PCI <-> IOBus Bridge Hot Swap
+ 15ed 1002 MCCS 8-port Serial Hot Swap
+ 15ed 1003 MCCS 16-port Serial Hot Swap
+ 9036 9036
+ 9050 PCI <-> IOBus Bridge
+ 10b5 2273 SH-ARC SoHard ARCnet card
+ 1522 0001 RockForce 4 Port V.90 Data/Fax/Voice Modem
+ 1522 0002 RockForce 2 Port V.90 Data/Fax/Voice Modem
+ 1522 0003 RockForce 6 Port V.90 Data/Fax/Voice Modem
+ 1522 0004 RockForce 8 Port V.90 Data/Fax/Voice Modem
+ 1522 0010 RockForce2000 4 Port V.90 Data/Fax/Voice Modem
+ 1522 0020 RockForce2000 2 Port V.90 Data/Fax/Voice Modem
+ 15ed 1000 Macrolink MCCS 8-port Serial
+ 15ed 1001 Macrolink MCCS 16-port Serial
+ 15ed 1002 Macrolink MCCS 8-port Serial Hot Swap
+ 15ed 1003 Macrolink MCCS 16-port Serial Hot Swap
+ d531 c002 PCIntelliCAN 2xSJA1000 CAN bus
+ d84d 4006 EX-4006 1P
+ d84d 4008 EX-4008 1P EPP/ECP
+ d84d 4014 EX-4014 2P
+ d84d 4018 EX-4018 3P EPP/ECP
+ d84d 4025 EX-4025 1S(16C550) RS-232
+ d84d 4027 EX-4027 1S(16C650) RS-232
+ d84d 4028 EX-4028 1S(16C850) RS-232
+ d84d 4036 EX-4036 2S(16C650) RS-232
+ d84d 4037 EX-4037 2S(16C650) RS-232
+ d84d 4038 EX-4038 2S(16C850) RS-232
+ d84d 4052 EX-4052 1S(16C550) RS-422/485
+ d84d 4053 EX-4053 2S(16C550) RS-422/485
+ d84d 4055 EX-4055 4S(16C550) RS-232
+ d84d 4058 EX-4055 4S(16C650) RS-232
+ d84d 4065 EX-4065 8S(16C550) RS-232
+ d84d 4068 EX-4068 8S(16C650) RS-232
+ d84d 4078 EX-4078 2S(16C552) RS-232+1P
+ 9054 PCI <-> IOBus Bridge
+ 10b5 2455 Wessex Techology PHIL-PCI
+ 9060 9060
+ 906d 9060SD
+ 125c 0640 Aries 16000P
+ 906e 9060ES
+ 9080 9080
+ 10b5 9080 9080 [real subsystem ID not set]
+ 129d 0002 Aculab PCI Prosidy card
+ a001 GTEK Jetport II 2 port serial adaptor
+ c001 GTEK Cyclone 16/32 port serial adaptor
+10b6 Madge Networks
+ 0001 Smart 16/4 PCI Ringnode
+ 0002 Smart 16/4 PCI Ringnode Mk2
+ 10b6 0002 Smart 16/4 PCI Ringnode Mk2
+ 10b6 0006 16/4 CardBus Adapter
+ 0003 Smart 16/4 PCI Ringnode Mk3
+ 0e11 b0fd Compaq NC4621 PCI, 4/16, WOL
+ 10b6 0003 Smart 16/4 PCI Ringnode Mk3
+ 10b6 0007 Presto PCI Plus Adapter
+ 0004 Smart 16/4 PCI Ringnode Mk1
+ 0006 16/4 Cardbus Adapter
+ 10b6 0006 16/4 CardBus Adapter
+ 0007 Presto PCI Adapter
+ 10b6 0007 Presto PCI
+ 0009 Smart 100/16/4 PCI-HS Ringnode
+ 10b6 0009 Smart 100/16/4 PCI-HS Ringnode
+ 000a Smart 100/16/4 PCI Ringnode
+ 10b6 000a Smart 100/16/4 PCI Ringnode
+ 000b 16/4 CardBus Adapter Mk2
+ 10b6 0008 16/4 CardBus Adapter Mk2
+ 10b6 000b 16/4 Cardbus Adapter Mk2
+ 000c RapidFire 3140V2 16/4 TR Adapter
+ 10b6 000c RapidFire 3140V2 16/4 TR Adapter
+ 1000 Collage 25/155 ATM Client Adapter
+ 1001 Collage 155 ATM Server Adapter
+10b7 3Com Corporation
+ 0001 3c985 1000BaseSX (SX/TX)
+ 1006 MINI PCI type 3B Data Fax Modem
+ 1007 Mini PCI 56k Winmodem
+ 10b7 615c Mini PCI 56K Modem
+ 3390 3c339 TokenLink Velocity
+ 3590 3c359 TokenLink Velocity XL
+ 10b7 3590 TokenLink Velocity XL Adapter (3C359/359B)
+ 4500 3c450 Cyclone/unknown
+ 5055 3c555 Laptop Hurricane
+ 5057 3c575 [Megahertz] 10/100 LAN CardBus
+ 10b7 5a57 3C575 Megahertz 10/100 LAN Cardbus PC Card
+ 5157 3c575 [Megahertz] 10/100 LAN CardBus
+ 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card
+ 5257 3CCFE575CT Cyclone CardBus
+ 10b7 5c57 FE575C-3Com 10/100 LAN CardBus-Fast Ethernet
+ 5900 3c590 10BaseT [Vortex]
+ 5920 3c592 EISA 10mbps Demon/Vortex
+ 5950 3c595 100BaseTX [Vortex]
+ 5951 3c595 100BaseT4 [Vortex]
+ 5952 3c595 100Base-MII [Vortex]
+ 5970 3c597 EISA Fast Demon/Vortex
+ 5b57 3c595 [Megahertz] 10/100 LAN CardBus
+ 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card
+ 6055 3c556 Hurricane CardBus
+ 6056 3c556B Hurricane CardBus
+ 10b7 6556 10/100 Mini PCI Ethernet Adapter
+ 6560 3CCFE656 Cyclone CardBus
+ 10b7 656a 3CCFEM656 10/100 LAN+56K Modem CardBus
+ 6561 3CCFEM656 10/100 LAN+56K Modem CardBus
+ 10b7 656b 3CCFEM656 10/100 LAN+56K Modem CardBus
+ 6562 3CCFEM656 [id 6562] Cyclone CardBus
+ 10b7 656b 3CCFEM656B 10/100 LAN+56K Modem CardBus
+ 6563 3CCFEM656B 10/100 LAN+56K Modem CardBus
+ 10b7 656b 3CCFEM656 10/100 LAN+56K Modem CardBus
+ 6564 3CCFEM656 [id 6564] Cyclone CardBus
+ 7646 3cSOHO100-TX Hurricane
+ 7940 3c803 FDDILink UTP Controller
+ 7980 3c804 FDDILink SAS Controller
+ 7990 3c805 FDDILink DAS Controller
+ 8811 Token ring
+ 9000 3c900 10BaseT [Boomerang]
+ 9001 3c900 Combo [Boomerang]
+ 9004 3c900B-TPO [Etherlink XL TPO]
+ 10b7 9004 3C900B-TPO Etherlink XL TPO 10Mb
+ 9005 3c900B-Combo [Etherlink XL Combo]
+ 10b7 9005 3C900B-Combo Etherlink XL Combo
+ 9006 3c900B-TPC [Etherlink XL TPC]
+ 900a 3c900B-FL [Etherlink XL FL]
+ 9050 3c905 100BaseTX [Boomerang]
+ 9051 3c905 100BaseT4 [Boomerang]
+ 9055 3c905B 100BaseTX [Cyclone]
+ 1028 0080 3C905B Fast Etherlink XL 10/100
+ 1028 0081 3C905B Fast Etherlink XL 10/100
+ 1028 0082 3C905B Fast Etherlink XL 10/100
+ 1028 0083 3C905B Fast Etherlink XL 10/100
+ 1028 0084 3C905B Fast Etherlink XL 10/100
+ 1028 0085 3C905B Fast Etherlink XL 10/100
+ 1028 0086 3C905B Fast Etherlink XL 10/100
+ 1028 0087 3C905B Fast Etherlink XL 10/100
+ 1028 0088 3C905B Fast Etherlink XL 10/100
+ 1028 0089 3C905B Fast Etherlink XL 10/100
+ 1028 0090 3C905B Fast Etherlink XL 10/100
+ 1028 0091 3C905B Fast Etherlink XL 10/100
+ 1028 0092 3C905B Fast Etherlink XL 10/100
+ 1028 0093 3C905B Fast Etherlink XL 10/100
+ 1028 0094 3C905B Fast Etherlink XL 10/100
+ 1028 0095 3C905B Fast Etherlink XL 10/100
+ 1028 0096 3C905B Fast Etherlink XL 10/100
+ 1028 0097 3C905B Fast Etherlink XL 10/100
+ 1028 0098 3C905B Fast Etherlink XL 10/100
+ 1028 0099 3C905B Fast Etherlink XL 10/100
+ 10b7 9055 3C905B Fast Etherlink XL 10/100
+ 9056 3c905B-T4 [Fast EtherLink XL 10/100]
+ 9058 3c905B-Combo [Deluxe Etherlink XL 10/100]
+ 905a 3c905B-FX [Fast Etherlink XL FX 10/100]
+ 9200 3c905C-TX/TX-M [Tornado]
+ 1028 0095 Integrated 3C905C-TX Fast Etherlink for PC Management NIC
+ 10b7 1000 3C905C-TX Fast Etherlink for PC Management NIC
+ 10b7 7000 10/100 Mini PCI Ethernet Adapter
+ 9800 3c980-TX [Fast Etherlink XL Server Adapter]
+ 10b7 9800 3c980-TX Fast Etherlink XL Server Adapter
+ 9805 3c980-TX 10/100baseTX NIC [Python-T]
+ 10b7 1201 3c982-TXM 10/100baseTX Dual Port A [Hydra]
+ 10b7 1202 3c982-TXM 10/100baseTX Dual Port B [Hydra]
+ 10b7 9805 3c980 10/100baseTX NIC [Python-T]
+ 9900 3C990-TX Typhoon
+ 9902 3CR990-TX-95 56-bit Typhoon Client
+ 9903 3CR990-TX-97 168-bit Typhoon Client
+ 9904 3C990B-TX-M/3C990BSVR [Typhoon2]
+ 9905 3CR990-FX-95/97/95 [Typhon Fiber]
+ 9908 3CR990SVR95 56-bit Typhoon Server
+ 9909 3CR990SVR97 Typhoon Server
+ 990b 3C990SVR [Typhoon Server]
+10b8 Standard Microsystems Corp [SMC]
+ 0005 83C170QF
+ 1055 e000 LANEPIC 10/100 [EVB171Q-PCI]
+ 1055 e002 LANEPIC 10/100 [EVB171G-PCI]
+ 10b8 a011 EtherPower II 10/100
+ 10b8 a014 EtherPower II 10/100
+ 10b8 a015 EtherPower II 10/100
+ 10b8 a016 EtherPower II 10/100
+ 10b8 a017 EtherPower II 10/100
+ 0006 LANEPIC
+ 1055 e100 LANEPIC Cardbus Fast Ethernet Adapter
+ 1055 e102 LANEPIC Cardbus Fast Ethernet Adapter
+ 1055 e300 LANEPIC Cardbus Fast Ethernet Adapter
+ 1055 e302 LANEPIC Cardbus Fast Ethernet Adapter
+ 10b8 a012 LANEPIC Cardbus Fast Ethernet Adapter
+ 13a2 8002 LANEPIC Cardbus Fast Ethernet Adapter
+ 13a2 8006 LANEPIC Cardbus Fast Ethernet Adapter
+ 1000 FDC 37c665
+ 1001 FDC 37C922
+ a011 83C170QF
+ b106 SMC34C90
+10b9 Acer Laboratories Inc. [ALi]
+ 0111 C-Media CMI8738/C3DX Audio Device (OEM)
+ 10b9 0111 C-Media CMI8738/C3DX Audio Device (OEM)
+ 1435 M1435
+ 1445 M1445
+ 1449 M1449
+ 1451 M1451
+ 1461 M1461
+ 1489 M1489
+ 1511 M1511 [Aladdin]
+ 1512 M1512 [Aladdin]
+ 1513 M1513 [Aladdin]
+ 1521 M1521 [Aladdin III]
+ 10b9 1521 ALI M1521 Aladdin III CPU Bridge
+ 1523 M1523
+ 10b9 1523 ALI M1523 ISA Bridge
+ 1531 M1531 [Aladdin IV]
+ 1533 M1533 PCI to ISA Bridge [Aladdin IV]
+ 10b9 1533 ALI M1533 Aladdin IV ISA Bridge
+ 1541 M1541
+ 10b9 1541 ALI M1541 Aladdin V/V+ AGP System Controller
+ 1543 M1543
+ 1621 M1621
+ 1631 ALI M1631 PCI North Bridge Aladdin Pro III
+ 1632 M1632M Northbridge+Trident
+ 1641 ALI M1641 PCI North Bridge Aladdin Pro IV
+ 1644 M1644/M1644T Northbridge+Trident
+ 1646 M1646 Northbridge+Trident
+ 1647 M1647 Northbridge [MAGiK 1 / MobileMAGiK 1]
+ 1651 M1651/M1651T Northbridge [Aladdin-Pro 5/5M,Aladdin-Pro 5T/5TM]
+ 1671 M1671 Northbridge [Aladdin-P4]
+ 3141 M3141
+ 3143 M3143
+ 3145 M3145
+ 3147 M3147
+ 3149 M3149
+ 3151 M3151
+ 3307 M3307
+ 3309 M3309
+ 5212 M4803
+ 5215 MS4803
+ 5217 M5217H
+ 5219 M5219
+ 5225 M5225
+ 5229 M5229 IDE
+ 1043 8053 A7A266 Motherboard IDE
+ 5235 M5225
+ 5237 USB 1.1 Controller
+ 5239 USB 2.0 Controller
+ 5243 M1541 PCI to AGP Controller
+ 5247 PCI to AGP Controller
+ 5251 M5251 P1394 OHCI 1.0 Controller
+ 5253 M5253 P1394 OHCI 1.1 Controller
+ 5261 M5261 Ethernet Controller
+ 5451 M5451 PCI AC-Link Controller Audio Device
+ 5453 M5453 PCI AC-Link Controller Modem Device
+ 5455 M5455 PCI AC-Link Controller Audio Device
+ 5457 M5457 AC-Link Modem Interface Controller
+ 5471 M5471 Memory Stick Controller
+ 5473 M5473 SD-MMC Controller
+ 7101 M7101 PMU
+ 10b9 7101 ALI M7101 Power Management Controller
+10ba Mitsubishi Electric Corp.
+ 0301 AccelGraphics AccelECLIPSE
+10bb Dapha Electronics Corporation
+10bc Advanced Logic Research
+10bd Surecom Technology
+ 0e34 NE-34
+10be Tseng Labs International Co.
+10bf Most Inc
+10c0 Boca Research Inc.
+10c1 ICM Co., Ltd.
+10c2 Auspex Systems Inc.
+10c3 Samsung Semiconductors, Inc.
+ 1100 Smartether100 SC1100 LAN Adapter (i82557B)
+10c4 Award Software International Inc.
+10c5 Xerox Corporation
+10c6 Rambus Inc.
+10c7 Media Vision
+10c8 Neomagic Corporation
+ 0001 NM2070 [MagicGraph NM2070]
+ 0002 NM2090 [MagicGraph 128V]
+ 0003 NM2093 [MagicGraph 128ZV]
+ 0004 NM2160 [MagicGraph 128XD]
+ 1014 00ba MagicGraph 128XD
+ 1025 1007 MagicGraph 128XD
+ 1028 0074 MagicGraph 128XD
+ 1028 0075 MagicGraph 128XD
+ 1028 007d MagicGraph 128XD
+ 1028 007e MagicGraph 128XD
+ 1033 802f MagicGraph 128XD
+ 104d 801b MagicGraph 128XD
+ 104d 802f MagicGraph 128XD
+ 104d 830b MagicGraph 128XD
+ 10ba 0e00 MagicGraph 128XD
+ 10c8 0004 MagicGraph 128XD
+ 10cf 1029 MagicGraph 128XD
+ 10f7 8308 MagicGraph 128XD
+ 10f7 8309 MagicGraph 128XD
+ 10f7 830b MagicGraph 128XD
+ 10f7 830d MagicGraph 128XD
+ 10f7 8312 MagicGraph 128XD
+ 0005 [MagicMedia 256AV]
+ 0006 NM2360 [MagicMedia 256ZX]
+ 0016 NM2380 [MagicMedia 256XL+]
+ 10c8 0016 MagicMedia 256XL+
+ 0025 [MagicMedia 256AV+]
+ 0083 [MagicGraph 128ZV Plus]
+ 8005 [MagicMedia 256AV Audio]
+ 0e11 b0d1 MagicMedia 256AV Audio Device on Discovery
+ 0e11 b126 MagicMedia 256AV Audio Device on Durango
+ 1014 00dd MagicMedia 256AV Audio Device on BlackTip Thinkpad
+ 1025 1003 MagicMedia 256AV Audio Device on TravelMate 720
+ 1028 008f MagicMedia 256AV Audio Device on Colorado Inspiron
+ 103c 0007 MagicMedia 256AV Audio Device on Voyager II
+ 103c 0008 MagicMedia 256AV Audio Device on Voyager III
+ 103c 000d MagicMedia 256AV Audio Device on Omnibook 900
+ 10c8 8005 MagicMedia 256AV Audio Device on FireAnt
+ 110a 8005 MagicMedia 256AV Audio Device
+ 14c0 0004 MagicMedia 256AV Audio Device
+ 8006 NM2360 [MagicMedia 256ZX Audio]
+ 8016 NM2360 [MagicMedia 256ZX Audio]
+10c9 Dataexpert Corporation
+10ca Fujitsu Microelectr., Inc.
+10cb Omron Corporation
+10cc Mentor ARC Inc
+10cd Advanced System Products, Inc
+ 1100 ASC1100
+ 1200 ASC1200 [(abp940) Fast SCSI-II]
+ 1300 ABP940-U / ABP960-U
+ 10cd 1310 ASC1300 SCSI Adapter
+ 2300 ABP940-UW
+ 2500 ABP940-U2W
+10ce Radius
+10cf Citicorp TTI
+ 2001 mb86605
+10d0 Fujitsu Limited
+10d1 FuturePlus Systems Corp.
+10d2 Molex Incorporated
+10d3 Jabil Circuit Inc
+10d4 Hualon Microelectronics
+10d5 Autologic Inc.
+10d6 Cetia
+10d7 BCM Advanced Research
+10d8 Advanced Peripherals Labs
+10d9 Macronix, Inc. [MXIC]
+ 0512 MX98713
+ 0531 MX987x5
+ 1186 1200 DFE-540TX ProFAST 10/100 Adapter
+ 8625 MX86250
+ 8888 MX86200
+10da Compaq IPG-Austin
+ 0508 TC4048 Token Ring 4/16
+ 3390 Tl3c3x9
+10db Rohm LSI Systems, Inc.
+10dc CERN/ECP/EDU
+ 0001 STAR/RD24 SCI-PCI (PMC)
+ 0002 TAR/RD24 SCI-PCI (PMC)
+ 0021 HIPPI destination
+ 0022 HIPPI source
+ 10dc ATT2C15-3 FPGA
+10dd Evans & Sutherland
+10de nVidia Corporation
+ 0008 NV1 [EDGE 3D]
+ 0009 NV1 [EDGE 3D]
+ 0010 NV2 [Mutara V08]
+ 0020 NV4 [Riva TnT]
+ 1043 0200 V3400 TNT
+ 1048 0c18 Erazor II SGRAM
+ 1048 0c1b Erazor II
+ 1092 0550 Viper V550
+ 1092 0552 Viper V550
+ 1092 4804 Viper V550
+ 1092 4808 Viper V550
+ 1092 4810 Viper V550
+ 1092 4812 Viper V550
+ 1092 4815 Viper V550
+ 1092 4820 Viper V550 with TV out
+ 1092 4822 Viper V550
+ 1092 4904 Viper V550
+ 1092 4914 Viper V550
+ 1092 8225 Viper V550
+ 10b4 273d Velocity 4400
+ 10b4 2740 Velocity 4400
+ 10de 0020 Riva TNT
+ 1102 1015 Graphics Blaster CT6710
+ 1102 1016 Graphics Blaster RIVA TNT
+ 0028 NV5 [Riva TnT2]
+ 1043 0200 AGP-V3800 SGRAM
+ 1043 0201 AGP-V3800 SDRAM
+ 1043 0205 PCI-V3800
+ 1043 4000 AGP-V3800PRO
+ 1092 4804 Viper V770
+ 1092 4a00 Viper V770
+ 1092 4a02 Viper V770 Ultra
+ 1092 5a00 RIVA TNT2/TNT2 Pro
+ 1092 6a02 Viper V770 Ultra
+ 1092 7a02 Viper V770 Ultra
+ 10de 0005 RIVA TNT2 Pro
+ 10de 000f Compaq NVIDIA TNT2 Pro
+ 1102 1020 3D Blaster RIVA TNT2
+ 1102 1026 3D Blaster RIVA TNT2 Digital
+ 14af 5810 Maxi Gamer Xentor
+ 0029 NV5 [Riva TnT2 Ultra]
+ 1043 0200 AGP-V3800 Deluxe
+ 1043 0201 AGP-V3800 Ultra SDRAM
+ 1043 0205 PCI-V3800 Ultra
+ 1102 1021 3D Blaster RIVA TNT2 Ultra
+ 1102 1029 3D Blaster RIVA TNT2 Ultra
+ 1102 102f 3D Blaster RIVA TNT2 Ultra
+ 14af 5820 Maxi Gamer Xentor 32
+ 002a NV5 [Riva TnT2]
+ 002b NV5 [Riva TnT2]
+ 002c NV6 [Vanta]
+ 1043 0200 AGP-V3800 Combat SDRAM
+ 1043 0201 AGP-V3800 Combat
+ 1092 6820 Viper V730
+ 1102 1031 CT6938 VANTA 8MB
+ 1102 1034 CT6894 VANTA 16MB
+ 14af 5008 Maxi Gamer Phoenix 2
+ 002d RIVA TNT2 Model 64
+ 1043 0200 AGP-V3800M
+ 1043 0201 AGP-V3800M
+ 1102 1023 CT6892 RIVA TNT2 Value
+ 1102 1024 CT6932 RIVA TNT2 Value 32Mb
+ 1102 102c CT6931 RIVA TNT2 Value [Jumper]
+ 1462 8808 MSI-8808
+ 1554 1041 PixelView RIVA TNT2 M64 32MB
+ 002e NV6 [Vanta]
+ 002f NV6 [Vanta]
+ 00a0 NV5 [Riva TNT2]
+ 14af 5810 Maxi Gamer Xentor
+ 0100 NV10 [GeForce 256 SDR]
+ 1043 0200 AGP-V6600 SGRAM
+ 1043 0201 AGP-V6600 SDRAM
+ 1043 4008 AGP-V6600 SGRAM
+ 1043 4009 AGP-V6600 SDRAM
+ 1102 102d CT6941 GeForce 256
+ 14af 5022 3D Prophet SE
+ 0101 NV10 [GeForce 256 DDR]
+ 1043 0202 AGP-V6800 DDR
+ 1043 400a AGP-V6800 DDR SGRAM
+ 1043 400b AGP-V6800 DDR SDRAM
+ 1102 102e CT6971 GeForce 256 DDR
+ 14af 5021 3D Prophet DDR-DVI
+ 0103 NV10 [Quadro]
+ 0110 NV11 [GeForce2 MX]
+ 1043 4015 AGP-V7100 Pro
+ 1043 4031 V7100 Pro with TV output
+ 14af 7103 3D Prophet II MX Dual-Display
+ 0111 NV11 [GeForce2 MX DDR]
+ 0112 NV11 [GeForce2 Go]
+ 0113 NV11 [GeForce2 MXR]
+ 0150 NV15 [GeForce2 GTS]
+ 1043 4016 V7700 AGP Video Card
+ 107d 2840 WinFast GeForce2 GTS with TV output
+ 1462 8831 Creative GeForce2 Pro
+ 0151 NV15 [GeForce2 Ti]
+ 0152 NV15 [GeForce2 Ultra, Bladerunner]
+ 1048 0c56 GLADIAC Ultra
+ 0153 NV15 [Quadro2 Pro]
+ 0170 NV17 [GeForce4 MX460]
+ 0171 NV17 [GeForce4 MX440]
+ 0172 NV17 [GeForce4 MX420]
+ 0173 NV1x
+ 0174 NV17 [GeForce4 440 Go]
+ 0175 NV17 [GeForce4 420 Go]
+ 0176 NV17 [GeForce4 420 Go 32M]
+ 0178 Quadro4 500XGL
+ 0179 NV17 [GeForce4 440 Go 64M]
+ 017a Quadro4 200/400NVS
+ 017b Quadro4 550XGL
+ 017c Quadro4 550 GoGL
+ 01a0 NV15 [GeForce2 - nForce GPU]
+ 01a4 nForce CPU bridge
+ 01ab nForce 420 Memory Controller (DDR)
+ 01ac nForce 220/420 Memory Controller
+ 01ad nForce 220/420 Memory Controller
+ 01b1 nForce Audio
+ 01b2 nForce ISA Bridge
+ 01b4 nForce PCI System Management
+ 01b7 nForce AGP to PCI Bridge
+ 01b8 nForce PCI-to-PCI bridge
+ 01bc nForce IDE
+ 0200 NV20 [GeForce3]
+ 1043 402f AGP-V8200 DDR
+ 0201 NV20 [GeForce3 Ti200]
+ 0202 NV20 [GeForce3 Ti500]
+ 1043 405b V8200 T5
+ 0203 NV20 [Quadro DCC]
+ 0250 NV25 [GeForce4 Ti4600]
+ 0251 NV25 [GeForce4 Ti4400]
+ 0253 NV25 [GeForce4 Ti4200]
+ 0258 Quadro4 900XGL
+ 0259 Quadro4 750XGL
+ 025b Quadro4 700XGL
+10df Emulex Corporation
+ 10df Light Pulse Fibre Channel Adapter
+ 1ae5 LP6000 Fibre Channel Host Adapter
+ f700 LP7000 Fibre Channel Host Adapter
+ f800 LP8000 Fibre Channel Host Adapter
+ f900 LP9000 Fibre Channel Host Adapter
+10e0 Integrated Micro Solutions Inc.
+ 5026 IMS5026/27/28
+ 5027 IMS5027
+ 5028 IMS5028
+ 8849 IMS8849
+ 8853 IMS8853
+ 9128 IMS9129 [Twin turbo 128]
+10e1 Tekram Technology Co.,Ltd.
+ 0391 TRM-S1040
+ 10e1 0391 DC-315U SCSI-3 Host Adapter
+ 690c DC-690c
+ dc29 DC-290
+10e2 Aptix Corporation
+10e3 Tundra Semiconductor Corp.
+ 0000 CA91C042 [Universe]
+ 0860 CA91C860 [QSpan]
+10e4 Tandem Computers
+10e5 Micro Industries Corporation
+10e6 Gainbery Computer Products Inc.
+10e7 Vadem
+10e8 Applied Micro Circuits Corp.
+ 2011 Q-Motion Video Capture/Edit board
+ 4750 S5930 [Matchmaker]
+ 5920 S5920
+ 8043 LANai4.x [Myrinet LANai interface chip]
+ 8062 S5933_PARASTATION
+ 807d S5933 [Matchmaker]
+ 8088 Kongsberg Spacetec Format Synchronizer
+ 8089 Kongsberg Spacetec Serial Output Board
+ 809c S5933_HEPC3
+ 80d7 PCI-9112
+ 80d9 PCI-9118
+ 80da PCI-9812
+ 811a PCI-IEEE1355-DS-DE Interface
+ 8170 S5933 [Matchmaker] (Chipset Development Tool)
+ 82db AJA HDNTV HD SDI Framestore
+10e9 Alps Electric Co., Ltd.
+10ea Intergraphics Systems
+ 1680 IGA-1680
+ 1682 IGA-1682
+ 1683 IGA-1683
+ 2000 CyberPro 2000
+ 2010 CyberPro 2000A
+ 5000 CyberPro 5000
+ 5050 CyberPro 5050
+10eb Artists Graphics
+ 0101 3GA
+ 8111 Twist3 Frame Grabber
+10ec Realtek Semiconductor Co., Ltd.
+ 8029 RTL-8029(AS)
+ 10b8 2011 EZ-Card (SMC1208)
+ 10ec 8029 RTL-8029(AS)
+ 1113 1208 EN1208
+ 1186 0300 DE-528
+ 1259 2400 AT-2400
+ 8129 RTL-8129
+ 10ec 8129 RT8129 Fast Ethernet Adapter
+ 8138 RT8139 (B/C) Cardbus Fast Ethernet Adapter
+ 10ec 8138 RT8139 (B/C) Fast Ethernet Adapter
+ 8139 RTL-8139/8139C/8139C+
+ 1025 8920 ALN-325
+ 1025 8921 ALN-325
+ 10bd 0320 EP-320X-R
+ 10ec 8139 RT8139
+ 1186 1300 DFE-538TX
+ 1186 1320 SN5200
+ 1186 8139 DRN-32TX
+ 1259 2500 AT-2500TX
+ 1259 2503 AT-2500TX/ACPI
+ 1429 d010 ND010
+ 1432 9130 EN-9130TX
+ 1436 8139 RT8139
+ 146c 1439 FE-1439TX
+ 1489 6001 GF100TXRII
+ 1489 6002 GF100TXRA
+ 149c 139a LFE-8139ATX
+ 149c 8139 LFE-8139TX
+ 2646 0001 EtheRx
+ 8e2e 7000 KF-230TX
+ 8e2e 7100 KF-230TX/2
+ a0a0 0007 ALN-325C
+ 8169 RTL-8169
+10ed Ascii Corporation
+ 7310 V7310
+10ee Xilinx, Inc.
+ 3fc0 RME Digi96
+ 3fc1 RME Digi96/8
+ 3fc2 RME Digi96/8 Pro
+ 3fc3 RME Digi96/8 Pad
+ 3fc4 RME Digi9652 (Hammerfall)
+ 3fc5 RME Hammerfall DSP
+10ef Racore Computer Products, Inc.
+ 8154 M815x Token Ring Adapter
+10f0 Peritek Corporation
+10f1 Tyan Computer
+10f2 Achme Computer, Inc.
+10f3 Alaris, Inc.
+10f4 S-MOS Systems, Inc.
+10f5 NKK Corporation
+ a001 NDR4000 [NR4600 Bridge]
+10f6 Creative Electronic Systems SA
+10f7 Matsushita Electric Industrial Co., Ltd.
+10f8 Altos India Ltd
+10f9 PC Direct
+10fa Truevision
+ 000c TARGA 1000
+10fb Thesys Gesellschaft für Mikroelektronik mbH
+10fc I-O Data Device, Inc.
+# What's in the cardbus end of a Sony ACR-A01 card, comes with newer Vaio CD-RW drives
+ 0003 Cardbus IDE Controller
+ 0005 Cardbus SCSI CBSC II
+10fd Soyo Computer, Inc
+10fe Fast Multimedia AG
+10ff NCube
+1100 Jazz Multimedia
+1101 Initio Corporation
+ 1060 INI-A100U2W
+ 9100 INI-9100/9100W
+ 9400 INI-940
+ 9401 INI-950
+ 9500 360P
+1102 Creative Labs
+ 0002 SB Live! EMU10k1
+ 1102 0020 CT4850 SBLive! Value
+ 1102 0021 CT4620 SBLive!
+ 1102 002f SBLive! mainboard implementation
+ 1102 4001 E-mu APS
+ 1102 8022 CT4780 SBLive! Value
+ 1102 8023 CT4790 SoundBlaster PCI512
+ 1102 8024 CT4760 SBLive!
+ 1102 8025 SBLive! Mainboard Implementation
+ 1102 8026 CT4830 SBLive! Value
+ 1102 8027 CT4832 SBLive! Value
+ 1102 8028 CT4760 SBLive! OEM version
+ 1102 8031 CT4831 SBLive! Value
+ 1102 8040 CT4760 SBLive!
+ 1102 8051 CT4850 SBLive! Value
+ 1102 8061 SBLive! Player 5.1
+ 0004 SB Audigy
+ 1102 0051 SB0090 Audigy Player
+ 4001 SB Audigy FireWire Port
+ 7002 SB Live! MIDI/Game Port
+ 1102 0020 Gameport Joystick
+ 7003 SB Audigy MIDI/Game port
+ 1102 0040 SB Audigy MIDI/Gameport
+ 8938 ES1371
+1103 Triones Technologies, Inc.
+ 0003 HPT343
+# Revisions: 01=HPT366, 03=HPT370, 04=HPT370A, 05=HPT372
+ 0004 HPT366/368/370/370A/372
+ 1103 0001 HPT370A
+ 1103 0005 HPT370 UDMA100
+ 0005 HPT372A
+ 0006 HPT302
+ 0007 HPT371
+ 0008 HPT374
+1104 RasterOps Corp.
+1105 Sigma Designs, Inc.
+ 1105 REALmagic Xcard MPEG 1/2/3/4 DVD Decoder
+ 8300 REALmagic Hollywood Plus DVD Decoder
+ 8400 EM840x REALmagic DVD/MPEG-2 Audio/Video Decoder
+1106 VIA Technologies, Inc.
+ 0130 VT6305 1394.A Controller
+ 0305 VT8363/8365 [KT133/KM133]
+ 1043 8033 A7V Mainboard
+ 1043 8042 A7V133/A7V133-C Mainboard
+ 147b a401 KT7/KT7-RAID/KT7A/KT7A-RAID Mainboard
+ 0391 VT8371 [KX133]
+ 0501 VT8501 [Apollo MVP4]
+ 0505 VT82C505
+ 0561 VT82C561
+ 0571 VT82C586B PIPC Bus Master IDE
+ 1458 5002 GA-7VAX Mainboard
+ 0576 VT82C576 3V [Apollo Master]
+ 0585 VT82C585VP [Apollo VP1/VPX]
+ 0586 VT82C586/A/B PCI-to-ISA [Apollo VP]
+ 1106 0000 MVP3 ISA Bridge
+ 0595 VT82C595 [Apollo VP2]
+ 0596 VT82C596 ISA [Mobile South]
+ 1106 0000 VT82C596/A/B PCI to ISA Bridge
+ 1458 0596 VT82C596/A/B PCI to ISA Bridge
+ 0597 VT82C597 [Apollo VP3]
+ 0598 VT82C598 [Apollo MVP3]
+ 0601 VT8601 [Apollo ProMedia]
+ 0605 VT8605 [ProSavage PM133]
+ 0680 VT82C680 [Apollo P6]
+ 0686 VT82C686 [Apollo Super South]
+ 1043 8033 A7V Mainboard
+ 1043 8042 A7V133/A7V133-C Mainboard
+ 1106 0000 VT82C686/A PCI to ISA Bridge
+ 1106 0686 VT82C686/A PCI to ISA Bridge
+ 0691 VT82C693A/694x [Apollo PRO133x]
+ 1458 0691 VT82C691 Apollo Pro System Controller
+ 0693 VT82C693 [Apollo Pro Plus]
+ 0698 VT82C693A [Apollo Pro133 AGP]
+ 0926 VT82C926 [Amazon]
+ 1000 VT82C570MV
+ 1106 VT82C570MV
+ 1571 VT82C416MV
+ 1595 VT82C595/97 [Apollo VP2/97]
+ 3038 USB
+ 0925 1234 USB Controller
+ 1234 0925 MVP3 USB Controller
+ 3040 VT82C586B ACPI
+ 3043 VT86C100A [Rhine]
+ 10bd 0000 VT86C100A Fast Ethernet Adapter
+ 1106 0100 VT86C100A Fast Ethernet Adapter
+ 1186 1400 DFE-530TX rev A
+ 3044 IEEE 1394 Host Controller
+ 3050 VT82C596 Power Management
+ 3051 VT82C596 Power Management
+ 3057 VT82C686 [Apollo Super ACPI]
+ 1043 8033 A7V Mainboard
+ 1043 8042 A7V133/A7V133-C Mainboard
+ 3058 VT82C686 AC97 Audio Controller
+ 0e11 b194 Soundmax integrated digital audio
+ 1106 4511 Onboard Audio on EP7KXA
+ 1458 7600 Onboard Audio
+ 1462 3091 MS-6309 Onboard Audio
+ 15dd 7609 Onboard Audio
+ 3059 VT8233 AC97 Audio Controller
+ 1458 a002 GA-7VAX Onboard Audio (Realtek ALC650)
+ 3065 VT6102 [Rhine-II]
+ 1186 1400 DFE-530TX rev A
+ 1186 1401 DFE-530TX rev B
+ 3068 AC97 Modem Controller
+ 3074 VT8233 PCI to ISA Bridge
+ 3091 VT8633 [Apollo Pro266]
+ 3099 VT8367 [KT266]
+ 1043 8064 A7V266-E
+ 1043 807f A7V333
+ 3101 VT8653 Host Bridge
+ 3102 VT8662 Host Bridge
+ 3103 VT8615 Host Bridge
+ 3104 USB 2.0
+ 1458 5004 GA-7VAX Mainboard
+ 3109 VT8233C PCI to ISA Bridge
+ 3112 VT8361 [KLE133] Host Bridge
+ 3128 VT8753 [P4X266 AGP]
+ 3133 VT3133 Host Bridge
+ 3147 VT8233A ISA Bridge
+ 3148 P4M266 Host Bridge
+ 3156 P/KN266 Host Bridge
+ 3177 VT8233A ISA Bridge
+ 1458 5001 GA-7VAX Mainboard
+ 3189 VT8377 [KT400 AGP] Host Bridge
+ 1458 5000 GA-7VAX Mainboard
+ 5030 VT82C596 ACPI [Apollo PRO]
+ 6100 VT85C100A [Rhine II]
+ 8231 VT8231 [PCI-to-ISA Bridge]
+ 8235 VT8235 ACPI
+ 8305 VT8363/8365 [KT133/KM133 AGP]
+ 8391 VT8371 [KX133 AGP]
+ 8501 VT8501 [Apollo MVP4 AGP]
+ 8596 VT82C596 [Apollo PRO AGP]
+ 8597 VT82C597 [Apollo VP3 AGP]
+ 8598 VT82C598/694x [Apollo MVP3/Pro133x AGP]
+ 8601 VT8601 [Apollo ProMedia AGP]
+ 8605 VT8605 [PM133 AGP]
+ 8691 VT82C691 [Apollo Pro]
+ 8693 VT82C693 [Apollo Pro Plus] PCI Bridge
+ b091 VT8633 [Apollo Pro266 AGP]
+ b099 VT8367 [KT333 AGP]
+ b101 VT8653 AGP Bridge
+ b102 VT8362 AGP Bridge
+ b103 VT8615 AGP Bridge
+ b112 VT8361 [KLE133] AGP Bridge
+ b168 VT8235 PCI Bridge
+1107 Stratus Computers
+ 0576 VIA VT82C570MV [Apollo] (Wrong vendor ID!)
+1108 Proteon, Inc.
+ 0100 p1690plus_AA
+ 0101 p1690plus_AB
+ 0105 P1690Plus
+ 0108 P1690Plus
+ 0138 P1690Plus
+ 0139 P1690Plus
+ 013c P1690Plus
+ 013d P1690Plus
+1109 Cogent Data Technologies, Inc.
+ 1400 EM110TX [EX110TX]
+110a Siemens Nixdorf AG
+ 0002 Pirahna 2-port
+ 0005 Tulip controller, power management, switch extender
+ 2102 DSCC4 WAN adapter
+ 4942 FPGA I-Bus Tracer for MBD
+ 6120 SZB6120
+110b Chromatic Research Inc.
+ 0001 Mpact Media Processor
+ 0004 Mpact 2
+110c Mini-Max Technology, Inc.
+110d Znyx Advanced Systems
+110e CPU Technology
+110f Ross Technology
+1110 Powerhouse Systems
+ 6037 Firepower Powerized SMP I/O ASIC
+ 6073 Firepower Powerized SMP I/O ASIC
+1111 Santa Cruz Operation
+# DJ: Some people say that 0x1112 is Rockwell International
+1112 RNS - Div. of Meret Communications Inc
+ 2200 FDDI Adapter
+ 2300 Fast Ethernet Adapter
+ 2340 4 Port Fast Ethernet Adapter
+ 2400 ATM Adapter
+1113 Accton Technology Corporation
+ 1211 SMC2-1211TX
+ 103c 1207 EN-1207D Fast Ethernet Adapter
+ 1113 1211 EN-1207D Fast Ethernet Adapter
+ 1216 EN-1216 Ethernet Adapter
+ 1217 EN-1217 Ethernet Adapter
+ 5105 10Mbps Network card
+ 9211 EN-1207D Fast Ethernet Adapter
+ 1113 9211 EN-1207D Fast Ethernet Adapter
+ 9511 Fast Ethernet Adapter
+1114 Atmel Corporation
+1115 3D Labs
+1116 Data Translation
+ 0022 DT3001
+ 0023 DT3002
+ 0024 DT3003
+ 0025 DT3004
+ 0026 DT3005
+ 0027 DT3001-PGL
+ 0028 DT3003-PGL
+1117 Datacube, Inc
+ 9500 Max-1C SVGA card
+ 9501 Max-1C image processing
+1118 Berg Electronics
+1119 ICP Vortex Computersysteme GmbH
+ 0000 GDT 6000/6020/6050
+ 0001 GDT 6000B/6010
+ 0002 GDT 6110/6510
+ 0003 GDT 6120/6520
+ 0004 GDT 6530
+ 0005 GDT 6550
+ 0006 GDT 6x17
+ 0007 GDT 6x27
+ 0008 GDT 6537
+ 0009 GDT 6557
+ 000a GDT 6115/6515
+ 000b GDT 6125/6525
+ 000c GDT 6535
+ 000d GDT 6555
+ 0100 GDT 6117RP/6517RP
+ 0101 GDT 6127RP/6527RP
+ 0102 GDT 6537RP
+ 0103 GDT 6557RP
+ 0104 GDT 6111RP/6511RP
+ 0105 GDT 6121RP/6521RP
+ 0110 GDT 6117RD/6517RD
+ 0111 GDT 6127RD/6527RD
+ 0112 GDT 6537RD
+ 0113 GDT 6557RD
+ 0114 GDT 6111RD/6511RD
+ 0115 GDT 6121RD/6521RD
+ 0118 GDT 6118RD/6518RD/6618RD
+ 0119 GDT 6128RD/6528RD/6628RD
+ 011a GDT 6538RD/6638RD
+ 011b GDT 6558RD/6658RD
+ 0120 GDT 6117RP2/6517RP2
+ 0121 GDT 6127RP2/6527RP2
+ 0122 GDT 6537RP2
+ 0123 GDT 6557RP2
+ 0124 GDT 6111RP2/6511RP2
+ 0125 GDT 6121RP2/6521RP2
+ 0136 GDT 6113RS/6513RS
+ 0137 GDT 6123RS/6523RS
+ 0138 GDT 6118RS/6518RS/6618RS
+ 0139 GDT 6128RS/6528RS/6628RS
+ 013a GDT 6538RS/6638RS
+ 013b GDT 6558RS/6658RS
+ 013c GDT 6533RS/6633RS
+ 013d GDT 6543RS/6643RS
+ 013e GDT 6553RS/6653RS
+ 013f GDT 6563RS/6663RS
+ 0166 GDT 7113RN/7513RN/7613RN
+ 0167 GDT 7123RN/7523RN/7623RN
+ 0168 GDT 7118RN/7518RN/7518RN
+ 0169 GDT 7128RN/7528RN/7628RN
+ 016a GDT 7538RN/7638RN
+ 016b GDT 7558RN/7658RN
+ 016c GDT 7533RN/7633RN
+ 016d GDT 7543RN/7643RN
+ 016e GDT 7553RN/7653RN
+ 016f GDT 7563RN/7663RN
+ 01d6 GDT 4x13RZ
+ 01d7 GDT 4x23RZ
+ 01f6 GDT 8x13RZ
+ 01f7 GDT 8x23RZ
+ 01fc GDT 8x33RZ
+ 01fd GDT 8x43RZ
+ 01fe GDT 8x53RZ
+ 01ff GDT 8x63RZ
+ 0210 GDT 6519RD/6619RD
+ 0211 GDT 6529RD/6629RD
+ 0260 GDT 7519RN/7619RN
+ 0261 GDT 7529RN/7629RN
+ 0300 GDT Raid Controller
+111a Efficient Networks, Inc
+ 0000 155P-MF1 (FPGA)
+ 0002 155P-MF1 (ASIC)
+ 0003 ENI-25P ATM
+ 111a 0000 ENI-25p Miniport ATM Adapter
+ 0005 SpeedStream (LANAI)
+ 111a 0001 ENI-3010 ATM
+ 111a 0009 ENI-3060 ADSL (VPI=0)
+ 111a 0101 ENI-3010 ATM
+ 111a 0109 ENI-3060CO ADSL (VPI=0)
+ 111a 0809 ENI-3060 ADSL (VPI=0 or 8)
+ 111a 0909 ENI-3060CO ADSL (VPI=0 or 8)
+ 111a 0a09 ENI-3060 ADSL (VPI=<0..15>)
+ 0007 SpeedStream ADSL
+ 111a 1001 ENI-3061 ADSL [ASIC]
+111b Teledyne Electronic Systems
+111c Tricord Systems Inc.
+ 0001 Powerbis Bridge
+111d Integrated Device Tech
+ 0001 IDT77211 ATM Adapter
+ 0003 IDT77252 ATM network controller
+111e Eldec
+111f Precision Digital Images
+ 4a47 Precision MX Video engine interface
+ 5243 Frame capture bus interface
+1120 EMC Corporation
+1121 Zilog
+1122 Multi-tech Systems, Inc.
+1123 Excellent Design, Inc.
+1124 Leutron Vision AG
+1125 Eurocore
+1126 Vigra
+1127 FORE Systems Inc
+ 0200 ForeRunner PCA-200 ATM
+ 0210 PCA-200PC
+ 0250 ATM
+ 0300 ForeRunner PCA-200EPC ATM
+ 0310 ATM
+ 0400 ForeRunnerHE ATM Adapter
+ 1127 0400 ForeRunnerHE ATM
+1129 Firmworks
+112a Hermes Electronics Company, Ltd.
+112b Linotype - Hell AG
+112c Zenith Data Systems
+112d Ravicad
+112e Infomedia Microelectronics Inc.
+112f Imaging Technology Inc
+ 0000 MVC IC-PCI
+ 0001 MVC IM-PCI Video frame grabber/processor
+1130 Computervision
+1131 Philips Semiconductors
+ 7130 SAA7130 Video Broadcast Decoder
+# PCI audio and video broadcast decoder (http://www.semiconductors.philips.com/pip/saa7134hl)
+ 7134 SAA7134
+ 7145 SAA7145
+ 7146 SAA7146
+ 114b 2003 DVRaptor Video Edit/Capture Card
+ 11bd 0006 DV500 Overlay
+ 11bd 000a DV500 Overlay
+1132 Mitel Corp.
+1133 Eicon Technology Corporation
+ 7901 EiconCard S90
+ 7902 EiconCard S90
+ 7911 EiconCard S91
+ 7912 EiconCard S91
+ 7941 EiconCard S94
+ 7942 EiconCard S94
+ 7943 EiconCard S94
+ 7944 EiconCard S94
+ b921 EiconCard P92
+ b922 EiconCard P92
+ b923 EiconCard P92
+ e001 DIVA 20PRO
+ 1133 e001 DIVA Pro 2.0 S/T
+ e002 DIVA 20
+ 1133 e002 DIVA 2.0 S/T
+ e003 DIVA 20PRO_U
+ 1133 e003 DIVA Pro 2.0 U
+ e004 DIVA 20_U
+ 1133 e004 DIVA 2.0 U
+ e005 DIVA LOW
+ 1133 e005 DIVA 2.01 S/T
+ e010 DIVA Server BRI-2M
+ 1133 e010 DIVA Server BRI-2M
+ e012 DIVA Server BRI-8M
+ 1133 e012 DIVA Server BRI-8M
+ e014 DIVA Server PRI-30M
+ 1133 e014 DIVA Server PRI-30M
+ e018 DIVA Server BRI-2M/-2F
+1134 Mercury Computer Systems
+ 0001 Raceway Bridge
+1135 Fuji Xerox Co Ltd
+ 0001 Printer controller
+1136 Momentum Data Systems
+1137 Cisco Systems Inc
+1138 Ziatech Corporation
+ 8905 8905 [STD 32 Bridge]
+1139 Dynamic Pictures, Inc
+ 0001 VGA Compatable 3D Graphics
+113a FWB Inc
+113b Network Computing Devices
+113c Cyclone Microsystems, Inc.
+ 0000 PCI-9060 i960 Bridge
+ 0001 PCI-SDK [PCI i960 Evaluation Platform]
+ 0911 PCI-911 [i960Jx-based Intelligent I/O Controller]
+ 0912 PCI-912 [i960CF-based Intelligent I/O Controller]
+ 0913 PCI-913
+ 0914 PCI-914 [I/O Controller w/ secondary PCI bus]
+113d Leading Edge Products Inc
+113e Sanyo Electric Co - Computer Engineering Dept
+113f Equinox Systems, Inc.
+ 0808 SST-64P Adapter
+ 1010 SST-128P Adapter
+ 80c0 SST-16P DB Adapter
+ 80c4 SST-16P RJ Adapter
+ 80c8 SST-16P Adapter
+ 8888 SST-4P Adapter
+ 9090 SST-8P Adapter
+1140 Intervoice Inc
+1141 Crest Microsystem Inc
+1142 Alliance Semiconductor Corporation
+ 3210 AP6410
+ 6422 ProVideo 6422
+ 6424 ProVideo 6424
+ 6425 ProMotion AT25
+ 643d ProMotion AT3D
+1143 NetPower, Inc
+1144 Cincinnati Milacron
+ 0001 Noservo controller
+1145 Workbit Corporation
+ f007 NinjaSCSI-32 KME
+ 8007 NinjaSCSI-32 Workbit
+ f010 NinjaSCSI-32 Workbit
+ f012 NinjaSCSI-32 Logitec
+ f013 NinjaSCSI-32 Logitec
+ f015 NinjaSCSI-32 Melco
+1146 Force Computers
+1147 Interface Corp
+1148 Syskonnect (Schneider & Koch)
+ 4000 FDDI Adapter
+ 0e11 b03b Netelligent 100 FDDI DAS Fibre SC
+ 0e11 b03c Netelligent 100 FDDI SAS Fibre SC
+ 0e11 b03d Netelligent 100 FDDI DAS UTP
+ 0e11 b03e Netelligent 100 FDDI SAS UTP
+ 0e11 b03f Netelligent 100 FDDI SAS Fibre MIC
+ 1148 5521 FDDI SK-5521 (SK-NET FDDI-UP)
+ 1148 5522 FDDI SK-5522 (SK-NET FDDI-UP DAS)
+ 1148 5541 FDDI SK-5541 (SK-NET FDDI-FP)
+ 1148 5543 FDDI SK-5543 (SK-NET FDDI-LP)
+ 1148 5544 FDDI SK-5544 (SK-NET FDDI-LP DAS)
+ 1148 5821 FDDI SK-5821 (SK-NET FDDI-UP64)
+ 1148 5822 FDDI SK-5822 (SK-NET FDDI-UP64 DAS)
+ 1148 5841 FDDI SK-5841 (SK-NET FDDI-FP64)
+ 1148 5843 FDDI SK-5843 (SK-NET FDDI-LP64)
+ 1148 5844 FDDI SK-5844 (SK-NET FDDI-LP64 DAS)
+ 4200 Token Ring adapter
+ 4300 Gigabit Ethernet
+ 1148 9821 SK-9821 (1000Base-T single link)
+ 1148 9822 SK-9822 (1000Base-T dual link)
+ 1148 9841 SK-9841 (1000Base-LX single link)
+ 1148 9842 SK-9842 (1000Base-LX dual link)
+ 1148 9843 SK-9843 (1000Base-SX single link)
+ 1148 9844 SK-9844 (1000Base-SX dual link)
+ 1148 9861 SK-9861 (1000Base-SX VF45 single link)
+ 1148 9862 SK-9862 (1000Base-SX VF45 dual link)
+ 4400 Gigabit Ethernet
+1149 Win System Corporation
+114a VMIC
+ 5579 VMIPCI-5579 (Reflective Memory Card)
+ 7587 VMIVME-7587
+114b Canopus Co., Ltd
+114c Annabooks
+114d IC Corporation
+114e Nikon Systems Inc
+114f Digi International
+ 0002 AccelePort EPC
+ 0003 RightSwitch SE-6
+ 0004 AccelePort Xem
+ 0005 AccelePort Xr
+ 0006 AccelePort Xr,C/X
+ 0009 AccelePort Xr/J
+ 000a AccelePort EPC/J
+ 000c DataFirePRIme T1 (1-port)
+ 000d SyncPort 2-Port (x.25/FR)
+ 0011 AccelePort 8r EIA-232 (IBM)
+ 0012 AccelePort 8r EIA-422
+ 0013 AccelePort Xr
+ 0014 AccelePort 8r EIA-422
+ 0015 AccelePort Xem
+ 0016 AccelePort EPC/X
+ 0017 AccelePort C/X
+ 001a DataFirePRIme E1 (1-port)
+ 001b AccelePort C/X (IBM)
+ 001d DataFire RAS T1/E1/PRI
+ 114f 0050 DataFire RAS E1 Adapter
+ 114f 0051 DataFire RAS Dual E1 Adapter
+ 114f 0052 DataFire RAS T1 Adapter
+ 114f 0053 DataFire RAS Dual T1 Adapter
+ 0023 AccelePort RAS
+ 0024 DataFire RAS B4 ST/U
+ 114f 0030 DataFire RAS BRI U Adapter
+ 114f 0031 DataFire RAS BRI S/T Adapter
+ 0026 AccelePort 4r 920
+ 0027 AccelePort Xr 920
+ 0034 AccelePort 2r 920
+ 0035 DataFire DSP T1/E1/PRI cPCI
+ 0040 AccelePort Xp
+ 0042 AccelePort 2p PCI
+ 0070 Datafire Micro V IOM2 (Europe)
+ 0071 Datafire Micro V (Europe)
+ 0072 Datafire Micro V IOM2 (North America)
+ 0073 Datafire Micro V (North America)
+ 6001 Avanstar
+1150 Thinking Machines Corp
+1151 JAE Electronics Inc.
+1152 Megatek
+1153 Land Win Electronic Corp
+1154 Melco Inc
+1155 Pine Technology Ltd
+1156 Periscope Engineering
+1157 Avsys Corporation
+1158 Voarx R & D Inc
+ 3011 Tokenet/vg 1001/10m anylan
+ 9050 Lanfleet/Truevalue
+ 9051 Lanfleet/Truevalue
+1159 Mutech Corp
+ 0001 MV-1000
+115a Harlequin Ltd
+115b Parallax Graphics
+115c Photron Ltd.
+115d Xircom
+ 0003 Cardbus Ethernet 10/100
+ 1014 0181 10/100 EtherJet Cardbus Adapter
+ 1014 1181 10/100 EtherJet Cardbus Adapter
+ 1014 8181 10/100 EtherJet Cardbus Adapter
+ 1014 9181 10/100 EtherJet Cardbus Adapter
+ 115d 0181 Cardbus Ethernet 10/100
+ 115d 1181 Cardbus Ethernet 10/100
+ 1179 0181 Cardbus Ethernet 10/100
+ 8086 8181 EtherExpress PRO/100 Mobile CardBus 32 Adapter
+ 8086 9181 EtherExpress PRO/100 Mobile CardBus 32 Adapter
+ 0005 Cardbus Ethernet 10/100
+ 1014 0182 10/100 EtherJet Cardbus Adapter
+ 1014 1182 10/100 EtherJet Cardbus Adapter
+ 115d 0182 Cardbus Ethernet 10/100
+ 115d 1182 Cardbus Ethernet 10/100
+ 0007 Cardbus Ethernet 10/100
+ 1014 0182 10/100 EtherJet Cardbus Adapter
+ 1014 1182 10/100 EtherJet Cardbus Adapter
+ 115d 0182 Cardbus Ethernet 10/100
+ 115d 1182 Cardbus Ethernet 10/100
+ 000b Cardbus Ethernet 10/100
+ 1014 0183 10/100 EtherJet Cardbus Adapter
+ 115d 0183 Cardbus Ethernet 10/100
+ 000c Mini-PCI V.90 56k Modem
+ 000f Cardbus Ethernet 10/100
+ 1014 0183 10/100 EtherJet Cardbus Adapter
+ 115d 0183 Cardbus Ethernet 10/100
+ 0101 Cardbus 56k modem
+ 115d 1081 Cardbus 56k Modem
+ 0103 Cardbus Ethernet + 56k Modem
+ 1014 9181 Cardbus 56k Modem
+ 1115 1181 Cardbus Ethernet 100 + 56k Modem
+ 115d 1181 CBEM56G-100 Ethernet + 56k Modem
+ 8086 9181 PRO/100 LAN + Modem56 CardBus
+115e Peer Protocols Inc
+115f Maxtor Corporation
+1160 Megasoft Inc
+1161 PFU Limited
+1162 OA Laboratory Co Ltd
+1163 Rendition
+ 0001 Verite 1000
+ 2000 Verite V2000/V2100/V2200
+ 1092 2000 Stealth II S220
+1164 Advanced Peripherals Technologies
+1165 Imagraph Corporation
+ 0001 Motion TPEG Recorder/Player with audio
+1166 ServerWorks
+ 0005 CNB20-LE Host Bridge
+ 0007 CNB20-LE Host Bridge
+ 0008 CNB20HE Host Bridge
+ 0009 CNB20LE Host Bridge
+ 0010 CIOB30
+ 0011 CMIC-HE
+ 0013 CNB20-HE Host Bridge
+ 0014 CNB20-HE Host Bridge
+ 0015 CMIC-GC Host Bridge
+ 0016 CMIC-GC Host Bridge
+ 0017 GCNB-LE Host Bridge
+ 0200 OSB4 South Bridge
+ 0201 CSB5 South Bridge
+ 0203 CSB6 South Bridge
+ 0211 OSB4 IDE Controller
+ 0212 CSB5 IDE Controller
+ 0213 CSB6 RAID/IDE Controller
+ 0220 OSB4/CSB5 OHCI USB Controller
+ 0221 CSB6 OHCI USB Controller
+ 0225 GCLE Host Bridge
+ 0227 GCLE-2 Host Bridge
+1167 Mutoh Industries Inc
+1168 Thine Electronics Inc
+1169 Centre for Development of Advanced Computing
+116a Polaris Communications
+ 6100 Bus/Tag Channel
+ 6800 Escon Channel
+ 7100 Bus/Tag Channel
+ 7800 Escon Channel
+116b Connectware Inc
+116c Intelligent Resources Integrated Systems
+116d Martin-Marietta
+116e Electronics for Imaging
+116f Workstation Technology
+1170 Inventec Corporation
+1171 Loughborough Sound Images Plc
+1172 Altera Corporation
+1173 Adobe Systems, Inc
+1174 Bridgeport Machines
+1175 Mitron Computer Inc.
+1176 SBE Incorporated
+1177 Silicon Engineering
+1178 Alfa, Inc.
+ afa1 Fast Ethernet Adapter
+1179 Toshiba America Info Systems
+ 0103 EX-IDE Type-B
+ 0404 DVD Decoder card
+ 0406 Tecra Video Capture device
+ 0407 DVD Decoder card (Version 2)
+ 0601 601
+ 0603 ToPIC95 PCI to CardBus Bridge for Notebooks
+ 060a ToPIC95
+ 060f ToPIC97
+ 0617 ToPIC95 PCI to Cardbus Bridge with ZV Support
+ 0618 CPU to PCI and PCI to ISA bridge
+# Claimed to be Lucent DSP1645 [Mars], but that's apparently incorrect. Does anyone know the correct ID?
+ 0701 FIR Port
+ 0804 TC6371AF SmartMedia Controller
+ 0805 SD TypA Controller
+ 0d01 FIR Port Type-DO
+ 1179 0001 FIR Port Type-DO
+117a A-Trend Technology
+117b L G Electronics, Inc.
+117c Atto Technology
+117d Becton & Dickinson
+117e T/R Systems
+117f Integrated Circuit Systems
+1180 Ricoh Co Ltd
+ 0465 RL5c465
+ 0466 RL5c466
+ 0475 RL5c475
+ 0476 RL5c476 II
+ 104d 80df Vaio PCG-FX403
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 0477 RL5c477
+ 0478 RL5c478
+ 1014 0184 ThinkPad A30p (2653-64G)
+ 0522 R5C522 IEEE 1394 Controller
+ 1014 01cf ThinkPad A30p (2653-64G)
+ 0551 R5C551 IEEE 1394 Controller
+ 0552 R5C552 IEEE 1394 Controller
+1181 Telmatics International
+1183 Fujikura Ltd
+1184 Forks Inc
+1185 Dataworld International Ltd
+1186 D-Link System Inc
+ 0100 DC21041
+ 1002 DL10050 Sundance Ethernet
+ 1186 1002 DFE-550TX
+ 1186 1012 DFE-580TX
+ 1300 RTL8139 Ethernet
+ 1186 1300 DFE-538TX 10/100 Ethernet Adapter
+ 1186 1301 DFE-530TX+ 10/100 Ethernet Adapter
+ 1340 DFE-690TXD CardBus PC Card
+ 1561 DRP-32TXD Cardbus PC Card
+ 4000 DL2K Ethernet
+1187 Advanced Technology Laboratories, Inc.
+1188 Shima Seiki Manufacturing Ltd.
+1189 Matsushita Electronics Co Ltd
+118a Hilevel Technology
+118b Hypertec Pty Limited
+118c Corollary, Inc
+ 0014 PCIB [C-bus II to PCI bus host bridge chip]
+ 1117 Intel 8-way XEON Profusion Chipset [Cache Coherency Filter]
+118d BitFlow Inc
+ 0001 Raptor-PCI framegrabber
+ 0012 Model 12 Road Runner Frame Grabber
+ 0014 Model 14 Road Runner Frame Grabber
+ 0024 Model 24 Road Runner Frame Grabber
+ 0044 Model 44 Road Runner Frame Grabber
+ 0112 Model 12 Road Runner Frame Grabber
+ 0114 Model 14 Road Runner Frame Grabber
+ 0124 Model 24 Road Runner Frame Grabber
+ 0144 Model 44 Road Runner Frame Grabber
+ 0212 Model 12 Road Runner Frame Grabber
+ 0214 Model 14 Road Runner Frame Grabber
+ 0224 Model 24 Road Runner Frame Grabber
+ 0244 Model 44 Road Runner Frame Grabber
+ 0312 Model 12 Road Runner Frame Grabber
+ 0314 Model 14 Road Runner Frame Grabber
+ 0324 Model 24 Road Runner Frame Grabber
+ 0344 Model 44 Road Runner Frame Grabber
+118e Hermstedt GmbH
+118f Green Logic
+1190 Tripace
+ c731 TP-910/920/940 PCI Ultra(Wide) SCSI Adapter
+1191 Artop Electronic Corp
+ 0003 SCSI Cache Host Adapter
+ 0004 ATP8400
+ 0005 ATP850UF
+ 0006 ATP860 NO-BIOS
+ 0007 ATP860
+ 0008 ATP865 NO-ROM
+ 0009 ATP865
+ 8002 AEC6710 SCSI-2 Host Adapter
+ 8010 AEC6712UW SCSI
+ 8020 AEC6712U SCSI
+ 8030 AEC6712S SCSI
+ 8040 AEC6712D SCSI
+ 8050 AEC6712SUW SCSI
+1192 Densan Company Ltd
+1193 Zeitnet Inc.
+ 0001 1221
+ 0002 1225
+1194 Toucan Technology
+1195 Ratoc System Inc
+1196 Hytec Electronics Ltd
+1197 Gage Applied Sciences, Inc.
+1198 Lambda Systems Inc
+1199 Attachmate Corporation
+119a Mind Share, Inc.
+119b Omega Micro Inc.
+ 1221 82C092G
+119c Information Technology Inst.
+119d Bug, Inc. Sapporo Japan
+119e Fujitsu Microelectronics Ltd.
+ 0001 FireStream 155
+ 0003 FireStream 50
+119f Bull HN Information Systems
+11a0 Convex Computer Corporation
+11a1 Hamamatsu Photonics K.K.
+11a2 Sierra Research and Technology
+11a3 Deuretzbacher GmbH & Co. Eng. KG
+11a4 Barco Graphics NV
+11a5 Microunity Systems Eng. Inc
+11a6 Pure Data Ltd.
+11a7 Power Computing Corp.
+11a8 Systech Corp.
+11a9 InnoSys Inc.
+ 4240 AMCC S933Q Intelligent Serial Card
+11aa Actel
+11ab Galileo Technology Ltd.
+ 0146 GT-64010
+ 4801 GT-48001
+ f003 GT-64010 Primary Image Piranha Image Generator
+11ac Canon Information Systems Research Aust.
+11ad Lite-On Communications Inc
+ 0002 LNE100TX
+ 11ad 0002 LNE100TX
+ 11ad 0003 LNE100TX
+ 11ad f003 LNE100TX
+ 11ad ffff LNE100TX
+ 1385 f004 FA310TX
+ c115 LNE100TX [Linksys EtherFast 10/100]
+ 11ad c001 LNE100TX [ver 2.0]
+11ae Aztech System Ltd
+11af Avid Technology Inc.
+11b0 V3 Semiconductor Inc.
+ 0002 V300PSC
+ 0292 V292PBC [Am29030/40 Bridge]
+ 0960 V96xPBC
+ c960 V96DPC
+11b1 Apricot Computers
+11b2 Eastman Kodak
+11b3 Barr Systems Inc.
+11b4 Leitch Technology International
+11b5 Radstone Technology Plc
+11b6 United Video Corp
+11b7 Motorola
+11b8 XPoint Technologies, Inc
+ 0001 Quad PeerMaster
+11b9 Pathlight Technology Inc.
+ c0ed SSA Controller
+11ba Videotron Corp
+11bb Pyramid Technology
+11bc Network Peripherals Inc
+ 0001 NP-PCI
+11bd Pinnacle Systems Inc.
+11be International Microcircuits Inc
+11bf Astrodesign, Inc.
+11c0 Hewlett Packard
+11c1 Lucent Microelectronics
+ 0440 56k WinModem
+ 0001 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 1033 8015 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 1033 8047 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 1033 804f LT WinModem 56k Data+Fax+Voice+Dsvd
+ 10cf 102c LB LT Modem V.90 56k
+ 10cf 104a BIBLO LT Modem 56k
+ 10cf 105f LB2 LT Modem V.90 56k
+ 1179 0001 Internal V.90 Modem
+ 11c1 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 122d 4101 MDP7800-U Modem
+ 122d 4102 MDP7800SP-U Modem
+ 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 13e0 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 13e0 0441 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 13e0 0450 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 13e0 f100 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 13e0 f101 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 144d 2101 LT56PV Modem
+ 149f 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 0441 56k WinModem
+ 1033 804d LT WinModem 56k Data+Fax
+ 1033 8065 LT WinModem 56k Data+Fax
+ 1092 0440 Supra 56i
+ 1179 0001 Internal V.90 Modem
+ 11c1 0440 LT WinModem 56k Data+Fax
+ 11c1 0441 LT WinModem 56k Data+Fax
+ 122d 4100 MDP7800-U Modem
+ 13e0 0040 LT WinModem 56k Data+Fax
+ 13e0 0100 LT WinModem 56k Data+Fax
+ 13e0 0410 LT WinModem 56k Data+Fax
+ 13e0 0420 TelePath Internet 56k WinModem
+ 13e0 0440 LT WinModem 56k Data+Fax
+ 13e0 0443 LT WinModem 56k Data+Fax
+ 13e0 f102 LT WinModem 56k Data+Fax
+ 1416 9804 CommWave 56k Modem
+ 141d 0440 LT WinModem 56k Data+Fax
+ 144f 0441 Lucent 56k V.90 DF Modem
+ 144f 0449 Lucent 56k V.90 DF Modem
+ 144f 110d Lucent Win Modem
+ 1468 0441 Presario 56k V.90 DF Modem
+ 1668 0440 Lucent Win Modem
+ 0442 56k WinModem
+ 0001 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 11c1 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 11c1 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 13e0 0412 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 13e0 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 13fc 2471 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 144d 2104 LT56PT Modem
+ 144f 1104 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 149f 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 1668 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 0443 LT WinModem
+ 0444 LT WinModem
+ 0445 LT WinModem
+ 0446 LT WinModem
+ 0447 LT WinModem
+ 0448 WinModem 56k
+ 1014 0131 Lucent Win Modem
+ 1033 8066 LT WinModem 56k Data+Fax+Voice+Dsvd
+ 13e0 0030 56k Voice Modem
+ 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd
+# Actiontech eth+modem card as used by Dell &c.
+ 1668 2400 LT WinModem 56k (MiniPCI Ethernet+Modem)
+ 0449 WinModem 56k
+ 0e11 b14d 56k V.90 Modem
+ 13e0 0020 LT WinModem 56k Data+Fax
+ 13e0 0041 TelePath Internet 56k WinModem
+ 1436 0440 Lucent Win Modem
+ 144f 0449 Lucent 56k V.90 DFi Modem
+ 1468 0410 IBM ThinkPad T23 (2647-4MG)
+ 1468 0440 Lucent Win Modem
+ 1468 0449 Presario 56k V.90 DFi Modem
+ 044a F-1156IV WinModem (V90, 56KFlex)
+ 10cf 1072 LB Global LT Modem
+ 13e0 0012 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 13e0 0042 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 144f 1005 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+ 044b LT WinModem
+ 044c LT WinModem
+ 044d LT WinModem
+ 044e LT WinModem
+ 044f V90 WildWire Modem
+ 0450 LT WinModem
+ 0451 LT WinModem
+ 0452 LT WinModem
+ 0453 LT WinModem
+ 0454 LT WinModem
+ 0455 LT WinModem
+ 0456 LT WinModem
+ 0457 LT WinModem
+ 0458 LT WinModem
+ 0459 LT WinModem
+ 045a LT WinModem
+ 0461 V90 WildWire Modem
+ 0462 V90 WildWire Modem
+ 0480 Venus Modem (V90, 56KFlex)
+ 5801 USB
+ 5802 USS-312 USB Controller
+ 5811 FW323
+ dead 0800 FireWire Host Bus Adapter
+11c2 Sand Microelectronics
+11c3 NEC Corp
+11c4 Document Technologies, Inc
+11c5 Shiva Corporation
+11c6 Dainippon Screen Mfg. Co. Ltd
+11c7 D.C.M. Data Systems
+11c8 Dolphin Interconnect Solutions AS
+ 0658 PSB32 SCI-Adapter D31x
+ d665 PSB64 SCI-Adapter D32x
+ d667 PSB66 SCI-Adapter D33x
+11c9 Magma
+ 0010 16-line serial port w/- DMA
+ 0011 4-line serial port w/- DMA
+11ca LSI Systems, Inc
+11cb Specialix Research Ltd.
+ 2000 PCI_9050
+ 11cb 0200 SX
+ 11cb b008 I/O8+
+ 4000 SUPI_1
+ 8000 T225
+11cc Michels & Kleberhoff Computer GmbH
+11cd HAL Computer Systems, Inc.
+11ce Netaccess
+11cf Pioneer Electronic Corporation
+11d0 Lockheed Martin Federal Systems-Manassas
+11d1 Auravision
+ 01f7 VxP524
+11d2 Intercom Inc.
+11d3 Trancell Systems Inc
+11d4 Analog Devices
+ 1805 SM56 PCI modem
+ 1889 AD1889 sound chip
+11d5 Ikon Corporation
+ 0115 10115
+ 0117 10117
+11d6 Tekelec Telecom
+11d7 Trenton Technology, Inc.
+11d8 Image Technologies Development
+11d9 TEC Corporation
+11da Novell
+11db Sega Enterprises Ltd
+11dc Questra Corporation
+11dd Crosfield Electronics Limited
+11de Zoran Corporation
+ 6057 ZR36057PQC Video cutting chipset
+ 1031 7efe DC10 Plus
+ 1031 fc00 MiroVIDEO DC50, Motion JPEG Capture/CODEC Board
+ 13ca 4231 JPEG/TV Card
+ 6120 ZR36120
+ 1328 f001 Cinemaster C DVD Decoder
+11df New Wave PDG
+11e0 Cray Communications A/S
+11e1 GEC Plessey Semi Inc.
+11e2 Samsung Information Systems America
+11e3 Quicklogic Corporation
+11e4 Second Wave Inc
+11e5 IIX Consulting
+11e6 Mitsui-Zosen System Research
+11e7 Toshiba America, Elec. Company
+11e8 Digital Processing Systems Inc.
+11e9 Highwater Designs Ltd.
+11ea Elsag Bailey
+11eb Formation Inc.
+11ec Coreco Inc
+11ed Mediamatics
+11ee Dome Imaging Systems Inc
+11ef Nicolet Technologies B.V.
+11f0 Compu-Shack
+ 4231 FDDI
+ 4232 FASTline UTP Quattro
+ 4233 FASTline FO
+ 4234 FASTline UTP
+ 4235 FASTline-II UTP
+ 4236 FASTline-II FO
+ 4731 GIGAline
+11f1 Symbios Logic Inc
+11f2 Picture Tel Japan K.K.
+11f3 Keithley Metrabyte
+11f4 Kinetic Systems Corporation
+ 2915 CAMAC controller
+11f5 Computing Devices International
+11f6 Compex
+ 0112 ENet100VG4
+ 0113 FreedomLine 100
+ 1401 ReadyLink 2000
+ 2011 RL100-ATX 10/100
+ 11f6 2011 RL100-ATX
+ 2201 ReadyLink 100TX (Winbond W89C840)
+ 11f6 2011 ReadyLink 100TX
+ 9881 RL100TX
+11f7 Scientific Atlanta
+11f8 PMC-Sierra Inc.
+ 7375 PM7375 [LASAR-155 ATM SAR]
+11f9 I-Cube Inc
+11fa Kasan Electronics Company, Ltd.
+11fb Datel Inc
+11fc Silicon Magic
+11fd High Street Consultants
+11fe Comtrol Corporation
+ 0001 RocketPort 8 Oct
+ 0002 RocketPort 8 Intf
+ 0003 RocketPort 16 Intf
+ 0004 RocketPort 32 Intf
+ 0005 RocketPort Octacable
+ 0006 RocketPort 8J
+ 0007 RocketPort 4-port
+ 0008 RocketPort 8-port
+ 0009 RocketPort 16-port
+ 000a RocketPort Plus Quadcable
+ 000b RocketPort Plus Octacable
+ 000c RocketPort 8-port Modem
+ 8015 RocketPort 4-port UART 16954
+11ff Scion Corporation
+1200 CSS Corporation
+1201 Vista Controls Corp
+1202 Network General Corp.
+1203 Bayer Corporation, Agfa Division
+1204 Lattice Semiconductor Corporation
+1205 Array Corporation
+1206 Amdahl Corporation
+1208 Parsytec GmbH
+ 4853 HS-Link Device
+1209 SCI Systems Inc
+120a Synaptel
+120b Adaptive Solutions
+120c Technical Corp.
+120d Compression Labs, Inc.
+120e Cyclades Corporation
+ 0100 Cyclom-Y below first megabyte
+ 0101 Cyclom-Y above first megabyte
+ 0102 Cyclom-4Y below first megabyte
+ 0103 Cyclom-4Y above first megabyte
+ 0104 Cyclom-8Y below first megabyte
+ 0105 Cyclom-8Y above first megabyte
+ 0200 Cyclades-Z below first megabyte
+ 0201 Cyclades-Z above first megabyte
+ 0300 PC300/RSV or /X21 (2 ports)
+ 0301 PC300/RSV or /X21 (1 port)
+ 0310 PC300/TE (2 ports)
+ 0311 PC300/TE (1 port)
+ 0320 PC300/TE-M (2 ports)
+ 0321 PC300/TE-M (1 port)
+ 0400 PC400
+120f Essential Communications
+ 0001 Roadrunner serial HIPPI
+1210 Hyperparallel Technologies
+1211 Braintech Inc
+1212 Kingston Technology Corp.
+1213 Applied Intelligent Systems, Inc.
+1214 Performance Technologies, Inc.
+1215 Interware Co., Ltd
+1216 Purup Prepress A/S
+1217 O2 Micro, Inc.
+ 6729 OZ6729
+ 673a OZ6730
+ 6832 OZ6832/6833 Cardbus Controller
+ 6836 OZ6836/6860 Cardbus Controller
+ 6872 OZ6812 Cardbus Controller
+ 6925 OZ6922 Cardbus Controller
+ 6933 OZ6933 Cardbus Controller
+ 1025 1016 Travelmate 612 TX
+ 6972 OZ6912 Cardbus Controller
+1218 Hybricon Corp.
+1219 First Virtual Corporation
+121a 3Dfx Interactive, Inc.
+ 0001 Voodoo
+ 0002 Voodoo 2
+ 0003 Voodoo Banshee
+ 1092 0003 Monster Fusion
+ 1092 4000 Monster Fusion
+ 1092 4002 Monster Fusion
+ 1092 4801 Monster Fusion AGP
+ 1092 4803 Monster Fusion AGP
+ 1092 8030 Monster Fusion
+ 1092 8035 Monster Fusion AGP
+ 10b0 0001 Dragon 4000
+ 1102 1018 3D Blaster Banshee VE
+ 121a 0001 Voodoo Banshee AGP
+ 121a 0003 Voodoo Banshee AGP SGRAM
+ 121a 0004 Voodoo Banshee
+ 139c 0016 Raven
+ 139c 0017 Raven
+ 14af 0002 Maxi Gamer Phoenix
+ 3030 3030 Skywell Magic TwinPower
+ 0004 Voodoo Banshee [Velocity 100]
+ 0005 Voodoo 3
+ 121a 0004 Voodoo3 AGP
+ 121a 0030 Voodoo3 AGP
+ 121a 0031 Voodoo3 AGP
+ 121a 0034 Voodoo3 AGP
+ 121a 0036 Voodoo3
+ 121a 0037 Voodoo3 AGP
+ 121a 0038 Voodoo3 AGP
+ 121a 003a Voodoo3 AGP
+ 121a 0044 Voodoo3
+ 121a 004b Velocity 100
+ 121a 004c Velocity 200
+ 121a 004d Voodoo3 AGP
+ 121a 004e Voodoo3 AGP
+ 121a 0051 Voodoo3 AGP
+ 121a 0052 Voodoo3 AGP
+ 121a 0060 Voodoo3 3500 TV (NTSC)
+ 121a 0061 Voodoo3 3500 TV (PAL)
+ 121a 0062 Voodoo3 3500 TV (SECAM)
+ 0009 Voodoo 4 / Voodoo 5
+ 121a 0009 Voodoo5 AGP 5500/6000
+ 0057 Voodoo 3/3000 [Avenger]
+121b Advanced Telecommunications Modules
+121c Nippon Texaco., Ltd
+121d Lippert Automationstechnik GmbH
+121e CSPI
+121f Arcus Technology, Inc.
+1220 Ariel Corporation
+ 1220 AMCC 5933 TMS320C80 DSP/Imaging board
+1221 Contec Co., Ltd
+1222 Ancor Communications, Inc.
+1223 Artesyn Communication Products
+ 0003 PM/Link
+ 0004 PM/T1
+ 0005 PM/E1
+ 0008 PM/SLS
+ 0009 BajaSpan Resource Target
+ 000a BajaSpan Section 0
+ 000b BajaSpan Section 1
+ 000c BajaSpan Section 2
+ 000d BajaSpan Section 3
+ 000e PM/PPC
+1224 Interactive Images
+1225 Power I/O, Inc.
+1227 Tech-Source
+1228 Norsk Elektro Optikk A/S
+1229 Data Kinesis Inc.
+122a Integrated Telecom
+122b LG Industrial Systems Co., Ltd
+122c Sican GmbH
+122d Aztech System Ltd
+ 1206 368DSP
+ 50dc 3328 Audio
+ 122d 0001 3328 Audio
+ 80da 3328 Audio
+ 122d 0001 3328 Audio
+122e Xyratex
+122f Andrew Corporation
+1230 Fishcamp Engineering
+1231 Woodward McCoach, Inc.
+1232 GPT Limited
+1233 Bus-Tech, Inc.
+1234 Technical Corp.
+1235 Risq Modular Systems, Inc.
+1236 Sigma Designs Corporation
+ 0000 RealMagic64/GX
+ 6401 REALmagic 64/GX (SD 6425)
+1237 Alta Technology Corporation
+1238 Adtran
+1239 3DO Company
+123a Visicom Laboratories, Inc.
+123b Seeq Technology, Inc.
+123c Century Systems, Inc.
+123d Engineering Design Team, Inc.
+ 0000 EasyConnect 8/32
+ 0002 EasyConnect 8/64
+ 0003 EasyIO
+123e Simutech, Inc.
+123f C-Cube Microsystems
+ 00e4 MPEG
+ 8120 E4?
+ 11bd 0006 DV500 E4
+ 11bd 000a DV500 E4
+ 8888 Cinemaster C 3.0 DVD Decoder
+ 1002 0001 Cinemaster C 3.0 DVD Decoder
+ 1002 0002 Cinemaster C 3.0 DVD Decoder
+ 1328 0001 Cinemaster C 3.0 DVD Decoder
+1240 Marathon Technologies Corp.
+1241 DSC Communications
+1242 Jaycor Networks, Inc.
+ 1242 JNI Corporation (former Jaycor Networks, Inc.)
+ 4643 FCI-1063 Fibre Channel Adapter
+ 6562 FCX2-6562 Dual Channel PCI-X Fibre Channel Adapter
+ 656a FCX-6562 PCI-X Fibre Channel Adapter
+1243 Delphax
+1244 AVM Audiovisuelles MKTG & Computer System GmbH
+ 0700 B1 ISDN
+ 0800 C4 ISDN
+ 0a00 A1 ISDN [Fritz]
+ 1244 0a00 FRITZ!Card ISDN Controller
+ 0e00 Fritz!PCI v2.0 ISDN
+ 1100 C2 ISDN
+ 1200 T1 ISDN
+1245 A.P.D., S.A.
+1246 Dipix Technologies, Inc.
+1247 Xylon Research, Inc.
+1248 Central Data Corporation
+1249 Samsung Electronics Co., Ltd.
+124a AEG Electrocom GmbH
+124b SBS/Greenspring Modular I/O
+ 0040 cPCI-200 Four Slot IndustryPack carrier
+ 124b 9080 PCI9080 Bridge
+124c Solitron Technologies, Inc.
+124d Stallion Technologies, Inc.
+ 0000 EasyConnection 8/32
+ 0002 EasyConnection 8/64
+ 0003 EasyIO
+ 0004 EasyConnection/RA
+124e Cylink
+124f Infotrend Technology, Inc.
+ 0041 IFT-2000 Series RAID Controller
+1250 Hitachi Microcomputer System Ltd
+1251 VLSI Solutions Oy
+1253 Guzik Technical Enterprises
+1254 Linear Systems Ltd.
+1255 Optibase Ltd
+ 1110 MPEG Forge
+ 1210 MPEG Fusion
+ 2110 VideoPlex
+ 2120 VideoPlex CC
+ 2130 VideoQuest
+1256 Perceptive Solutions, Inc.
+ 4201 PCI-2220I
+ 4401 PCI-2240I
+ 5201 PCI-2000
+1257 Vertex Networks, Inc.
+1258 Gilbarco, Inc.
+1259 Allied Telesyn International
+ 2560 AT-2560 Fast Ethernet Adapter (i82557B)
+125a ABB Power Systems
+125b Asix Electronics Corporation
+ 1400 ALFA GFC2204
+125c Aurora Technologies, Inc.
+ 0640 Aries 16000P
+125d ESS Technology
+ 0000 ES336H Fax Modem (Early Model)
+ 1948 Solo?
+ 1968 ES1968 Maestro 2
+ 1028 0085 ES1968 Maestro-2 PCI
+ 1033 8051 ES1968 Maestro-2 Audiodrive
+ 1969 ES1969 Solo-1 Audiodrive
+ 1014 0166 ES1969 SOLO-1 AudioDrive on IBM Aptiva Mainboard
+ 125d 8888 Solo-1 Audio Adapter
+ 525f c888 ES1969 SOLO-1 AudioDrive (+ES1938)
+ 1978 ES1978 Maestro 2E
+ 1033 803c ES1978 Maestro-2E Audiodrive
+ 1033 8058 ES1978 Maestro-2E Audiodrive
+ 1092 4000 Monster Sound MX400
+ 1179 0001 ES1978 Maestro-2E Audiodrive
+ 1988 ES1988 Allegro-1
+ 1092 4100 Sonic Impact S100
+ 125d 1988 ESS Allegro-1 Audiodrive
+ 1989 ESS Modem
+ 125d 1989 ESS Modem
+ 1998 ES1983S Maestro-3i PCI Audio Accelerator
+ 1028 00e6 ES1983S Maestro-3i (Dell Inspiron 8100)
+ 1999 ES1983S Maestro-3i PCI Modem Accelerator
+ 199a ES1983S Maestro-3i PCI Audio Accelerator
+ 199b ES1983S Maestro-3i PCI Modem Accelerator
+ 2808 ES336H Fax Modem (Later Model)
+ 2838 ES2838/2839 SuperLink Modem
+ 2898 ES2898 Modem
+ 125d 0424 ES56-PI Data Fax Modem
+ 125d 0425 ES56T-PI Data Fax Modem
+ 125d 0426 ES56V-PI Data Fax Modem
+ 125d 0427 VW-PI Data Fax Modem
+ 125d 0428 ES56ST-PI Data Fax Modem
+ 125d 0429 ES56SV-PI Data Fax Modem
+ 147a c001 ES56-PI Data Fax Modem
+ 14fe 0428 ES56-PI Data Fax Modem
+ 14fe 0429 ES56-PI Data Fax Modem
+125e Specialvideo Engineering SRL
+125f Concurrent Technologies, Inc.
+1260 Harris Semiconductor
+ 3873 Prism 2.5 Wavelan chipset
+ 1186 3501 DWL-520 Wireless PCI Adapter
+ 8130 HMP8130 NTSC/PAL Video Decoder
+ 8131 HMP8131 NTSC/PAL Video Decoder
+1261 Matsushita-Kotobuki Electronics Industries, Ltd.
+1262 ES Computer Company, Ltd.
+1263 Sonic Solutions
+1264 Aval Nagasaki Corporation
+1265 Casio Computer Co., Ltd.
+1266 Microdyne Corporation
+ 0001 NE10/100 Adapter (i82557B)
+ 1910 NE2000Plus (RT8029) Ethernet Adapter
+ 1266 1910 NE2000Plus Ethernet Adapter
+1267 S. A. Telecommunications
+ 5352 PCR2101
+ 5a4b Telsat Turbo
+1268 Tektronix
+1269 Thomson-CSF/TTM
+126a Lexmark International, Inc.
+126b Adax, Inc.
+126c Northern Telecom
+126d Splash Technology, Inc.
+126e Sumitomo Metal Industries, Ltd.
+126f Silicon Motion, Inc.
+ 0710 SM710 LynxEM
+ 0712 SM712 LynxEM+
+ 0720 SM720 Lynx3DM
+ 0810 SM810 LynxE
+ 0811 SM811 LynxE
+ 0820 SM820 Lynx3D
+ 0910 SM910
+1270 Olympus Optical Co., Ltd.
+1271 GW Instruments
+1272 Telematics International
+1273 Hughes Network Systems
+ 0002 DirecPC
+1274 Ensoniq
+ 1371 ES1371 [AudioPCI-97]
+ 0e11 0024 AudioPCI on Motherboard Compaq Deskpro
+ 0e11 b1a7 ES1371, ES1373 AudioPCI
+ 1033 80ac ES1371, ES1373 AudioPCI
+ 1042 1854 Tazer
+ 107b 8054 Tabor2
+ 1274 1371 Creative Sound Blaster AudioPCI64V, AudioPCI128
+ 1462 6470 ES1371, ES1373 AudioPCI On Motherboard MS-6147 1.1A
+ 1462 6560 ES1371, ES1373 AudioPCI On Motherboard MS-6156 1.10
+ 1462 6630 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 1.0A
+ 1462 6631 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 1.0A
+ 1462 6632 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 2.0A
+ 1462 6633 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 2.0A
+ 1462 6820 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00
+ 1462 6822 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00A
+ 1462 6830 ES1371, ES1373 AudioPCI On Motherboard MS-6183 1.00
+ 1462 6880 ES1371, ES1373 AudioPCI On Motherboard MS-6188 1.00
+ 1462 6900 ES1371, ES1373 AudioPCI On Motherboard MS-6190 1.00
+ 1462 6910 ES1371, ES1373 AudioPCI On Motherboard MS-6191
+ 1462 6930 ES1371, ES1373 AudioPCI On Motherboard MS-6193
+ 1462 6990 ES1371, ES1373 AudioPCI On Motherboard MS-6199BX 2.0A
+ 1462 6991 ES1371, ES1373 AudioPCI On Motherboard MS-6199VIA 2.0A
+ 14a4 2077 ES1371, ES1373 AudioPCI On Motherboard KR639
+ 14a4 2105 ES1371, ES1373 AudioPCI On Motherboard MR800
+ 14a4 2107 ES1371, ES1373 AudioPCI On Motherboard MR801
+ 14a4 2172 ES1371, ES1373 AudioPCI On Motherboard DR739
+ 1509 9902 ES1371, ES1373 AudioPCI On Motherboard KW11
+ 1509 9903 ES1371, ES1373 AudioPCI On Motherboard KW31
+ 1509 9904 ES1371, ES1373 AudioPCI On Motherboard KA11
+ 1509 9905 ES1371, ES1373 AudioPCI On Motherboard KC13
+ 152d 8801 ES1371, ES1373 AudioPCI On Motherboard CP810E
+ 152d 8802 ES1371, ES1373 AudioPCI On Motherboard CP810
+ 152d 8803 ES1371, ES1373 AudioPCI On Motherboard P3810E
+ 152d 8804 ES1371, ES1373 AudioPCI On Motherboard P3810-S
+ 152d 8805 ES1371, ES1373 AudioPCI On Motherboard P3820-S
+ 270f 2001 ES1371, ES1373 AudioPCI On Motherboard 6CTR
+ 270f 2200 ES1371, ES1373 AudioPCI On Motherboard 6WTX
+ 270f 3000 ES1371, ES1373 AudioPCI On Motherboard 6WSV
+ 270f 3100 ES1371, ES1373 AudioPCI On Motherboard 6WIV2
+ 270f 3102 ES1371, ES1373 AudioPCI On Motherboard 6WIV
+ 270f 7060 ES1371, ES1373 AudioPCI On Motherboard 6ASA2
+ 8086 4249 ES1371, ES1373 AudioPCI On Motherboard BI440ZX
+ 8086 424c ES1371, ES1373 AudioPCI On Motherboard BL440ZX
+ 8086 425a ES1371, ES1373 AudioPCI On Motherboard BZ440ZX
+ 8086 4341 ES1371, ES1373 AudioPCI On Motherboard Cayman
+ 8086 4343 ES1371, ES1373 AudioPCI On Motherboard Cape Cod
+ 8086 4649 ES1371, ES1373 AudioPCI On Motherboard Fire Island
+ 8086 464a ES1371, ES1373 AudioPCI On Motherboard FJ440ZX
+ 8086 4d4f ES1371, ES1373 AudioPCI On Motherboard Montreal
+ 8086 4f43 ES1371, ES1373 AudioPCI On Motherboard OC440LX
+ 8086 5243 ES1371, ES1373 AudioPCI On Motherboard RC440BX
+ 8086 5352 ES1371, ES1373 AudioPCI On Motherboard SunRiver
+ 8086 5643 ES1371, ES1373 AudioPCI On Motherboard Vancouver
+ 8086 5753 ES1371, ES1373 AudioPCI On Motherboard WS440BX
+ 5000 ES1370 [AudioPCI]
+ 4942 4c4c Creative Sound Blaster AudioPCI128
+ 5880 5880 AudioPCI
+ 1274 2000 Creative Sound Blaster AudioPCI128
+ 1274 2003 Creative SoundBlaster AudioPCI 128
+ 1274 5880 Creative Sound Blaster AudioPCI128
+ 1458 a000 5880 AudioPCI On Motherboard 6OXET
+ 1462 6880 5880 AudioPCI On Motherboard MS-6188 1.00
+ 270f 2001 5880 AudioPCI On Motherboard 6CTR
+ 270f 2200 5880 AudioPCI On Motherboard 6WTX
+ 270f 7040 5880 AudioPCI On Motherboard 6ATA4
+1275 Network Appliance Corporation
+1276 Switched Network Technologies, Inc.
+1277 Comstream
+1278 Transtech Parallel Systems Ltd.
+ 0701 TPE3/TM3 PowerPC Node
+1279 Transmeta Corporation
+ 0295 Northbridge
+ 0395 LongRun Northbridge
+ 0396 SDRAM controller
+ 0397 BIOS scratchpad
+127a Rockwell International
+ 1002 HCF 56k Data/Fax Modem
+ 1092 094c SupraExpress 56i PRO [Diamond SUP2380]
+ 122d 4002 HPG / MDP3858-U
+ 122d 4005 MDP3858-E
+ 122d 4007 MDP3858-A/-NZ
+ 122d 4012 MDP3858-SA
+ 122d 4017 MDP3858-W
+ 122d 4018 MDP3858-W
+ 127a 1002 Rockwell 56K D/F HCF Modem
+ 1003 HCF 56k Data/Fax Modem
+ 0e11 b0bc 229-DF Zephyr
+ 0e11 b114 229-DF Cheetah
+ 1033 802b 229-DF
+ 13df 1003 PCI56RX Modem
+ 13e0 0117 IBM
+ 13e0 0147 IBM F-1156IV+/R3 Spain V.90 Modem
+ 13e0 0197 IBM
+ 13e0 01c7 IBM F-1156IV+/R3 WW V.90 Modem
+ 13e0 01f7 IBM
+ 1436 1003 IBM
+ 1436 1103 IBM 5614PM3G V.90 Modem
+ 1436 1602 Compaq 229-DF Ducati
+ 1004 HCF 56k Data/Fax/Voice Modem
+ 1048 1500 MicroLink 56k Modem
+ 10cf 1059 Fujitsu 229-DFRT
+ 1005 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 1033 8029 229-DFSV
+ 1033 8054 Modem
+ 10cf 103c Fujitsu
+ 10cf 1055 Fujitsu 229-DFSV
+ 10cf 1056 Fujitsu 229-DFSV
+ 122d 4003 MDP3858SP-U
+ 122d 4006 Packard Bell MDP3858V-E
+ 122d 4008 MDP3858SP-A/SP-NZ
+ 122d 4009 MDP3858SP-E
+ 122d 4010 MDP3858V-U
+ 122d 4011 MDP3858SP-SA
+ 122d 4013 MDP3858V-A/V-NZ
+ 122d 4015 MDP3858SP-W
+ 122d 4016 MDP3858V-W
+ 122d 4019 MDP3858V-SA
+ 13df 1005 PCI56RVP Modem
+ 13e0 0187 IBM
+ 13e0 01a7 IBM
+ 13e0 01b7 IBM DF-1156IV+/R3 Spain V.90 Modem
+ 13e0 01d7 IBM DF-1156IV+/R3 WW V.90 Modem
+ 1436 1005 IBM
+ 1436 1105 IBM
+ 1437 1105 IBM 5614PS3G V.90 Modem
+ 1022 HCF 56k Modem
+ 1436 1303 M3-5614PM3G V.90 Modem
+ 1023 HCF 56k Data/Fax Modem
+ 122d 4020 Packard Bell MDP3858-WE
+ 122d 4023 MDP3858-UE
+ 13e0 0247 IBM F-1156IV+/R6 Spain V.90 Modem
+ 13e0 0297 IBM
+ 13e0 02c7 IBM F-1156IV+/R6 WW V.90 Modem
+ 1436 1203 IBM
+ 1436 1303 IBM
+ 1024 HCF 56k Data/Fax/Voice Modem
+ 1025 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 10cf 106a Fujitsu 235-DFSV
+ 122d 4021 Packard Bell MDP3858V-WE
+ 122d 4022 MDP3858SP-WE
+ 122d 4024 MDP3858V-UE
+ 122d 4025 MDP3858SP-UE
+ 1026 HCF 56k PCI Speakerphone Modem
+ 1032 HCF 56k Modem
+ 1033 HCF 56k Modem
+ 1034 HCF 56k Modem
+ 1035 HCF 56k PCI Speakerphone Modem
+ 1036 HCF 56k Modem
+ 1085 HCF 56k Volcano PCI Modem
+ 2005 HCF 56k Data/Fax Modem
+ 104d 8044 229-DFSV
+ 104d 8045 229-DFSV
+ 104d 8055 PBE/Aztech 235W-DFSV
+ 104d 8056 235-DFSV
+ 104d 805a Modem
+ 104d 805f Modem
+ 104d 8074 Modem
+ 2013 HSF 56k Data/Fax Modem
+ 1179 0001 Modem
+ 1179 ff00 Modem
+ 2014 HSF 56k Data/Fax/Voice Modem
+ 10cf 1057 Fujitsu Citicorp III
+ 122d 4050 MSP3880-U
+ 122d 4055 MSP3880-W
+ 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 10cf 1063 Fujitsu
+ 10cf 1064 Fujitsu
+ 1468 2015 Fujitsu
+ 2016 HSF 56k Data/Fax/Voice/Spkp Modem
+ 122d 4051 MSP3880V-W
+ 122d 4052 MSP3880SP-W
+ 122d 4054 MSP3880V-U
+ 122d 4056 MSP3880SP-U
+ 122d 4057 MSP3880SP-A
+ 4311 Riptide HSF 56k PCI Modem
+ 127a 4311 Ring Modular? Riptide HSF RT HP Dom
+ 13e0 0210 HP-GVC
+ 4320 Riptide PCI Audio Controller
+ 1235 4320 Riptide PCI Audio Controller
+ 4321 Riptide HCF 56k PCI Modem
+ 1235 4321 Hewlett Packard DF
+ 1235 4324 Hewlett Packard DF
+ 13e0 0210 Hewlett Packard DF
+ 144d 2321 Riptide
+ 4322 Riptide PCI Game Controller
+ 1235 4322 Riptide PCI Game Controller
+ 8234 RapidFire 616X ATM155 Adapter
+ 108d 0022 RapidFire 616X ATM155 Adapter
+ 108d 0027 RapidFire 616X ATM155 Adapter
+127b Pixera Corporation
+127c Crosspoint Solutions, Inc.
+127d Vela Research
+127e Winnov, L.P.
+127f Fujifilm
+1280 Photoscript Group Ltd.
+1281 Yokogawa Electric Corporation
+1282 Davicom Semiconductor, Inc.
+ 9009 Ethernet 100/10 MBit
+ 9100 Ethernet 100/10 MBit
+ 9102 Ethernet 100/10 MBit
+ 9132 Ethernet 100/10 MBit
+1283 Integrated Technology Express, Inc.
+ 673a IT8330G
+ 8330 IT8330G
+ 8888 IT8888F PCI to ISA Bridge with SMB
+ 8889 IT8889F PCI to ISA Bridge
+ e886 IT8330G
+1284 Sahara Networks, Inc.
+1285 Platform Technologies, Inc.
+ 0100 AGOGO sound chip (aka ESS Maestro 1)
+1286 Mazet GmbH
+1287 M-Pact, Inc.
+ 001e LS220D DVD Decoder
+ 001f LS220C DVD Decoder
+1288 Timestep Corporation
+1289 AVC Technology, Inc.
+128a Asante Technologies, Inc.
+128b Transwitch Corporation
+128c Retix Corporation
+128d G2 Networks, Inc.
+ 0021 ATM155 Adapter
+128e Hoontech Corporation/Samho Multi Tech Ltd.
+ 0008 ST128 WSS/SB
+ 0009 ST128 SAM9407
+ 000a ST128 Game Port
+ 000b ST128 MPU Port
+ 000c ST128 Ctrl Port
+128f Tateno Dennou, Inc.
+1290 Sord Computer Corporation
+1291 NCS Computer Italia
+1292 Tritech Microelectronics Inc
+1293 Media Reality Technology
+1294 Rhetorex, Inc.
+1295 Imagenation Corporation
+1296 Kofax Image Products
+1297 Holco Enterprise Co, Ltd/Shuttle Computer
+1298 Spellcaster Telecommunications Inc.
+1299 Knowledge Technology Lab.
+129a VMetro, inc.
+ 0615 PBT-615 PCI-X Bus Analyzer
+129b Image Access
+129c Jaycor
+129d Compcore Multimedia, Inc.
+129e Victor Company of Japan, Ltd.
+129f OEC Medical Systems, Inc.
+12a0 Allen-Bradley Company
+12a1 Simpact Associates, Inc.
+12a2 Newgen Systems Corporation
+12a3 Lucent Technologies
+12a4 NTT Electronics Technology Company
+12a5 Vision Dynamics Ltd.
+12a6 Scalable Networks, Inc.
+12a7 AMO GmbH
+12a8 News Datacom
+12a9 Xiotech Corporation
+12aa SDL Communications, Inc.
+12ab Yuan Yuan Enterprise Co., Ltd.
+ 3000 MPG-200C PCI DVD Decoder Card
+12ac Measurex Corporation
+12ad Multidata GmbH
+12ae Alteon Networks Inc.
+ 0001 AceNIC Gigabit Ethernet
+ 12ae 0001 Gigabit Ethernet-SX (Universal)
+ 1410 0104 Gigabit Ethernet-SX PCI Adapter
+ 0002 AceNIC Gigabit Ethernet (Copper)
+ 12ae 0002 Gigabit Ethernet-T (3C986-T)
+12af TDK USA Corp
+12b0 Jorge Scientific Corp
+12b1 GammaLink
+12b2 General Signal Networks
+12b3 Inter-Face Co Ltd
+12b4 FutureTel Inc
+12b5 Granite Systems Inc.
+12b6 Natural Microsystems
+12b7 Cognex Modular Vision Systems Div. - Acumen Inc.
+12b8 Korg
+12b9 US Robotics/3Com
+ 1006 WinModem
+ 12b9 005c USR 56k Internal Voice WinModem (Model 3472)
+ 12b9 005e USR 56k Internal WinModem (Models 662975)
+ 12b9 0062 USR 56k Internal Voice WinModem (Model 662978)
+ 12b9 0068 USR 56k Internal Voice WinModem (Model 5690)
+ 12b9 007a USR 56k Internal Voice WinModem (Model 662974)
+ 12b9 007f USR 56k Internal WinModem (Models 5698, 5699)
+ 12b9 0080 USR 56k Internal WinModem (Models 2975, 3528)
+ 12b9 0081 USR 56k Internal Voice WinModem (Models 2974, 3529)
+ 12b9 0091 USR 56k Internal Voice WinModem (Model 2978)
+ 1007 USR 56k Internal WinModem
+ 12b9 00a3 USR 56k Internal WinModem (Model 3595)
+ 1008 56K FaxModem Model 5610
+ 12b9 00a2 USR 56k Internal FAX Modem (Model 2977)
+ 12b9 00aa USR 56k Internal Voice Modem (Model 2976)
+ 12b9 00ab USR 56k Internal Voice Modem (Model 5609)
+ 12b9 00ac USR 56k Internal Voice Modem (Model 3298)
+ 12b9 00ad USR 56k Internal FAX Modem (Model 5610)
+12ba PMC Sierra
+12bb Nippon Unisoft Corporation
+12bc Array Microsystems
+12bd Computerm Corp.
+12be Anchor Chips Inc.
+ 3041 AN3041Q CO-MEM
+ 3042 AN3042Q CO-MEM Lite
+ 12be 3042 Anchor Chips Lite Evaluation Board
+12bf Fujifilm Microdevices
+12c0 Infimed
+12c1 GMM Research Corp
+12c2 Mentec Limited
+12c3 Holtek Microelectronics Inc
+ 0058 PCI NE2K Ethernet
+ 5598 PCI NE2K Ethernet
+12c4 Connect Tech Inc
+12c5 Picture Elements Incorporated
+ 007e Imaging/Scanning Subsystem Engine
+ 007f Imaging/Scanning Subsystem Engine
+ 0081 PCIVST [Grayscale Thresholding Engine]
+ 0085 Video Simulator/Sender
+ 0086 THR2 Multi-scale Thresholder
+12c6 Mitani Corporation
+12c7 Dialogic Corp
+12c8 G Force Co, Ltd
+12c9 Gigi Operations
+12ca Integrated Computing Engines
+12cb Antex Electronics Corporation
+12cc Pluto Technologies International
+12cd Aims Lab
+12ce Netspeed Inc.
+12cf Prophet Systems, Inc.
+12d0 GDE Systems, Inc.
+12d1 PSITech
+12d2 NVidia / SGS Thomson (Joint Venture)
+ 0008 NV1
+ 0009 DAC64
+ 0018 Riva128
+ 1048 0c10 VICTORY Erazor
+ 107b 8030 STB Velocity 128
+ 1092 0350 Viper V330
+ 1092 1092 Viper V330
+ 10b4 1b1b STB Velocity 128
+ 10b4 1b1d STB Velocity 128
+ 10b4 1b1e STB Velocity 128, PAL TV-Out
+ 10b4 1b20 STB Velocity 128 Sapphire
+ 10b4 1b21 STB Velocity 128
+ 10b4 1b22 STB Velocity 128 AGP, NTSC TV-Out
+ 10b4 1b23 STB Velocity 128 AGP, PAL TV-Out
+ 10b4 1b27 STB Velocity 128 DVD
+ 10b4 1b88 MVP Pro 128
+ 10b4 222a STB Velocity 128 AGP
+ 10b4 2230 STB Velocity 128
+ 10b4 2232 STB Velocity 128
+ 10b4 2235 STB Velocity 128 AGP
+ 2a15 54a3 3DVision-SAGP / 3DexPlorer 3000
+ 0019 Riva128ZX
+ 0020 TNT
+ 0028 TNT2
+ 0029 UTNT2
+ 002c VTNT2
+ 00a0 ITNT2
+12d3 Vingmed Sound A/S
+12d4 Ulticom (Formerly DGM&S)
+12d5 Equator Technologies
+12d6 Analogic Corp
+12d7 Biotronic SRL
+12d8 Pericom Semiconductor
+12d9 Aculab PLC
+12da True Time Inc.
+12db Annapolis Micro Systems, Inc
+12dc Symicron Computer Communication Ltd.
+12dd Management Graphics
+12de Rainbow Technologies
+12df SBS Technologies Inc
+12e0 Chase Research
+ 0010 ST16C654 Quad UART
+ 0020 ST16C654 Quad UART
+ 0030 ST16C654 Quad UART
+12e1 Nintendo Co, Ltd
+12e2 Datum Inc. Bancomm-Timing Division
+12e3 Imation Corp - Medical Imaging Systems
+12e4 Brooktrout Technology Inc
+12e5 Apex Semiconductor Inc
+12e6 Cirel Systems
+12e7 Sunsgroup Corporation
+12e8 Crisc Corp
+12e9 GE Spacenet
+12ea Zuken
+12eb Aureal Semiconductor
+ 0001 Vortex 1
+ 104d 8036 AU8820 Vortex Digital Audio Processor
+ 1092 2000 Sonic Impact A3D
+ 1092 2100 Sonic Impact A3D
+ 1092 2110 Sonic Impact A3D
+ 1092 2200 Sonic Impact A3D
+ 122d 1002 AU8820 Vortex Digital Audio Processor
+ 12eb 0001 AU8820 Vortex Digital Audio Processor
+ 5053 3355 Montego
+ 0002 Vortex 2
+ 104d 8049 AU8830 Vortex 3D Digital Audio Processor
+ 104d 807b AU8830 Vortex 3D Digital Audio Processor
+ 1092 3000 Monster Sound II
+ 1092 3001 Monster Sound II
+ 1092 3002 Monster Sound II
+ 1092 3003 Monster Sound II
+ 1092 3004 Monster Sound II
+ 12eb 0001 AU8830 Vortex 3D Digital Audio Processor
+ 12eb 0002 AU8830 Vortex 3D Digital Audio Processor
+ 12eb 0088 AU8830 Vortex 3D Digital Audio Processor
+ 144d 3510 AU8830 Vortex 3D Digital Audio Processor
+ 5053 3356 Montego II
+ 0003 AU8810 Vortex Digital Audio Processor
+ 104d 8049 AU8810 Vortex Digital Audio Processor
+ 104d 8077 AU8810 Vortex Digital Audio Processor
+ 109f 1000 AU8810 Vortex Digital Audio Processor
+ 12eb 0003 AU8810 Vortex Digital Audio Processor
+ 1462 6780 AU8810 Vortex Digital Audio Processor
+ 14a4 2073 AU8810 Vortex Digital Audio Processor
+ 14a4 2091 AU8810 Vortex Digital Audio Processor
+ 14a4 2104 AU8810 Vortex Digital Audio Processor
+ 14a4 2106 AU8810 Vortex Digital Audio Processor
+ 8803 Vortex 56k Software Modem
+ 12eb 8803 Vortex 56k Software Modem
+12ec 3A International, Inc.
+12ed Optivision Inc.
+12ee Orange Micro
+12ef Vienna Systems
+12f0 Pentek
+12f1 Sorenson Vision Inc
+12f2 Gammagraphx, Inc.
+12f3 Radstone Technology
+12f4 Megatel
+12f5 Forks
+12f6 Dawson France
+12f7 Cognex
+12f8 Electronic Design GmbH
+ 0002 VideoMaker
+12f9 Four Fold Ltd
+12fb Spectrum Signal Processing
+12fc Capital Equipment Corp
+12fd I2S
+12fe ESD Electronic System Design GmbH
+12ff Lexicon
+1300 Harman International Industries Inc
+1302 Computer Sciences Corp
+1303 Innovative Integration
+1304 Juniper Networks
+1305 Netphone, Inc
+1306 Duet Technologies
+1307 Computer Boards
+ 0001 PCI-DAS1602/16
+ 000b PCI-DIO48H
+ 000c PCI-PDISO8
+ 000d PCI-PDISO16
+ 000f PCI-DAS1200
+ 0010 PCI-DAS1602/12
+ 0014 PCI-DIO24H
+ 0015 PCI-DIO24H/CTR3
+ 0016 PCI-DIO48H/CTR15
+ 0017 PCI-DIO96H
+ 0018 PCI-CTR05
+ 0019 PCI-DAS1200/JR
+ 001a PCI-DAS1001
+ 001b PCI-DAS1002
+ 001c PCI-DAS1602JR/16
+ 001d PCI-DAS6402/16
+ 001e PCI-DAS6402/12
+ 001f PCI-DAS16/M1
+ 0020 PCI-DDA02/12
+ 0021 PCI-DDA04/12
+ 0022 PCI-DDA08/12
+ 0023 PCI-DDA02/16
+ 0024 PCI-DDA04/16
+ 0025 PCI-DDA08/16
+ 0026 PCI-DAC04/12-HS
+ 0027 PCI-DAC04/16-HS
+ 0028 PCI-DIO24
+ 0029 PCI-DAS08
+ 002c PCI-INT32
+ 0033 PCI-DUAL-AC5
+ 0034 PCI-DAS-TC
+ 0035 PCI-DAS64/M1/16
+ 0036 PCI-DAS64/M2/16
+ 0037 PCI-DAS64/M3/16
+ 004c PCI-DAS1000
+1308 Jato Technologies Inc.
+ 0001 NetCelerator Adapter
+ 1308 0001 NetCelerator Adapter
+1309 AB Semiconductor Ltd
+130a Mitsubishi Electric Microcomputer
+130b Colorgraphic Communications Corp
+130c Ambex Technologies, Inc
+130d Accelerix Inc
+130e Yamatake-Honeywell Co. Ltd
+130f Advanet Inc
+1310 Gespac
+1311 Videoserver, Inc
+1312 Acuity Imaging, Inc
+1313 Yaskawa Electric Co.
+1316 Teradyne Inc
+1317 Linksys
+ 0981 Fast Ethernet 10/100
+ 0985 Network Everywhere Fast Ethernet 10/100 model NC100
+ 1985 Fast Ethernet 10/100
+1318 Packet Engines Inc.
+ 0911 PCI Ethernet Adapter
+1319 Fortemedia, Inc
+ 0801 Xwave QS3000A [FM801]
+ 0802 Xwave QS3000A [FM801 game port]
+ 1000 FM801 PCI Audio
+ 1001 FM801 PCI Joystick
+131a Finisar Corp.
+131c Nippon Electro-Sensory Devices Corp
+131d Sysmic, Inc.
+131e Xinex Networks Inc
+131f Siig Inc
+ 1000 CyberSerial (1-port) 16550
+ 1001 CyberSerial (1-port) 16650
+ 1002 CyberSerial (1-port) 16850
+ 1010 Duet 1S(16550)+1P
+ 1011 Duet 1S(16650)+1P
+ 1012 Duet 1S(16850)+1P
+ 1020 CyberParallel (1-port)
+ 1021 CyberParallel (2-port)
+ 1030 CyberSerial (2-port) 16550
+ 1031 CyberSerial (2-port) 16650
+ 1032 CyberSerial (2-port) 16850
+ 1034 Trio 2S(16550)+1P
+ 1035 Trio 2S(16650)+1P
+ 1036 Trio 2S(16850)+1P
+ 1050 CyberSerial (4-port) 16550
+ 1051 CyberSerial (4-port) 16650
+ 1052 CyberSerial (4-port) 16850
+ 2000 CyberSerial (1-port) 16550
+ 2001 CyberSerial (1-port) 16650
+ 2002 CyberSerial (1-port) 16850
+ 2010 Duet 1S(16550)+1P
+ 2011 Duet 1S(16650)+1P
+ 2012 Duet 1S(16850)+1P
+ 2020 CyberParallel (1-port)
+ 2021 CyberParallel (2-port)
+ 2030 CyberSerial (2-port) 16550
+ 131f 2030 PCI Serial Card
+ 2031 CyberSerial (2-port) 16650
+ 2032 CyberSerial (2-port) 16850
+ 2040 Trio 1S(16550)+2P
+ 2041 Trio 1S(16650)+2P
+ 2042 Trio 1S(16850)+2P
+ 2050 CyberSerial (4-port) 16550
+ 2051 CyberSerial (4-port) 16650
+ 2052 CyberSerial (4-port) 16850
+ 2060 Trio 2S(16550)+1P
+ 2061 Trio 2S(16650)+1P
+ 2062 Trio 2S(16850)+1P
+1320 Crypto AG
+1321 Arcobel Graphics BV
+1322 MTT Co., Ltd
+1323 Dome Inc
+1324 Sphere Communications
+1325 Salix Technologies, Inc
+1326 Seachange international
+1327 Voss scientific
+1328 quadrant international
+1329 Productivity Enhancement
+132a Microcom Inc.
+132b Broadband Technologies
+132c Micrel Inc
+132d Integrated Silicon Solution, Inc.
+1330 MMC Networks
+1331 Radisys Corp.
+1332 Micro Memory
+1334 Redcreek Communications, Inc
+1335 Videomail, Inc
+1337 Third Planet Publishing
+1338 BT Electronics
+133a Vtel Corp
+133b Softcom Microsystems
+133c Holontech Corp
+133d SS Technologies
+133e Virtual Computer Corp
+133f SCM Microsystems
+1340 Atalla Corp
+1341 Kyoto Microcomputer Co
+1342 Promax Systems Inc
+1343 Phylon Communications Inc
+1344 Crucial Technology
+1345 Arescom Inc
+1347 Odetics
+1349 Sumitomo Electric Industries, Ltd.
+134a DTC Technology Corp.
+ 0001 Domex 536
+ 0002 Domex DMX3194UP SCSI Adapter
+134b ARK Research Corp.
+134c Chori Joho System Co. Ltd
+134d PCTel Inc
+ 7890 HSP MicroModem 56
+ 7891 HSP MicroModem 56
+ 134d 0001 HSP MicroModem 56
+ 7892 HSP MicroModem 56
+ 7893 HSP MicroModem 56
+ 7894 HSP MicroModem 56
+ 7895 HSP MicroModem 56
+ 7896 HSP MicroModem 56
+ 7897 HSP MicroModem 56
+134e CSTI
+134f Algo System Co Ltd
+1350 Systec Co. Ltd
+1351 Sonix Inc
+1353 Dassault A.T.
+1354 Dwave System Inc
+1355 Kratos Analytical Ltd
+1356 The Logical Co
+1359 Prisa Networks
+135a Brain Boxes
+135b Giganet Inc
+135c Quatech Inc
+ 00f0 MPAC-100 Syncronous Serial Card (Zilog 85230)
+135d ABB Network Partner AB
+135e Sealevel Systems Inc
+ 7101 Single Port RS-232/422/485/530
+ 7201 Dual Port RS-232/422/485 Interface
+ 7202 Dual Port RS-232 Interface
+ 7401 Four Port RS-232 Interface
+ 7402 Four Port RS-422/485 Interface
+ 7801 Eight Port RS-232 Interface
+ 8001 8001 Digital I/O Adapter
+135f I-Data International A-S
+1360 Meinberg Funkuhren
+1361 Soliton Systems K.K.
+1362 Fujifacom Corporation
+1363 Phoenix Technology Ltd
+1364 ATM Communications Inc
+1365 Hypercope GmbH
+1366 Teijin Seiki Co. Ltd
+1367 Hitachi Zosen Corporation
+1368 Skyware Corporation
+1369 Digigram
+136a High Soft Tech
+136b Kawasaki Steel Corporation
+136c Adtek System Science Co Ltd
+136d Gigalabs Inc
+136f Applied Magic Inc
+1370 ATL Products
+1371 CNet Technology Inc
+1373 Silicon Vision Inc
+1374 Silicom Ltd
+1375 Argosystems Inc
+1376 LMC
+1377 Electronic Equipment Production & Distribution GmbH
+1378 Telemann Co. Ltd
+1379 Asahi Kasei Microsystems Co Ltd
+137a Mark of the Unicorn Inc
+137b PPT Vision
+137c Iwatsu Electric Co Ltd
+137d Dynachip Corporation
+137e Patriot Scientific Corporation
+137f Japan Satellite Systems Inc
+1380 Sanritz Automation Co Ltd
+1381 Brains Co. Ltd
+1382 Marian - Electronic & Software
+1383 Controlnet Inc
+1384 Reality Simulation Systems Inc
+1385 Netgear
+ 4100 802.11b Wireless Adapter (MA301)
+ 620a GA620
+ 622a GA622
+ 630a GA630
+ f311 FA311
+1386 Video Domain Technologies
+1387 Systran Corp
+1388 Hitachi Information Technology Co Ltd
+1389 Applicom International
+ 0001 PCI1500PFB [Intelligent fieldbus adaptor]
+138a Fusion Micromedia Corp
+138b Tokimec Inc
+138c Silicon Reality
+138d Future Techno Designs pte Ltd
+138e Basler GmbH
+138f Patapsco Designs Inc
+1390 Concept Development Inc
+1391 Development Concepts Inc
+1392 Medialight Inc
+1393 Moxa Technologies Co Ltd
+ 1040 Smartio C104H/PCI
+ 1680 Smartio C168H/PCI
+ 2040 Intellio CP-204J
+ 2180 Intellio C218 Turbo PCI
+ 3200 Intellio C320 Turbo PCI
+1394 Level One Communications
+ 0001 LXT1001 Gigabit Ethernet
+ 1394 0001 NetCelerator Adapter
+1395 Ambicom Inc
+1396 Cipher Systems Inc
+1397 Cologne Chip Designs GmbH
+ 2bd0 ISDN network controller [HFC-PCI]
+ 1397 2bd0 ISDN Board
+ e4bf 1000 CI1-1-Harp
+1398 Clarion co. Ltd
+1399 Rios systems Co Ltd
+139a Alacritech Inc
+ 0001 Quad Port 10/100 Server Accelerator
+ 0003 Single Port 10/100 Server Accelerator
+ 0005 Single Port Gigabit Server Accelerator
+139b Mediasonic Multimedia Systems Ltd
+139c Quantum 3d Inc
+139d EPL limited
+139e Media4
+139f Aethra s.r.l.
+13a0 Crystal Group Inc
+13a1 Kawasaki Heavy Industries Ltd
+13a2 Ositech Communications Inc
+13a3 Hifn Inc.
+ 0005 7751 Security Processor
+ 0006 6500 Public Key Processor
+ 0007 7811 Security Processor
+ 0012 7951 Security Processor
+13a4 Rascom Inc
+13a5 Audio Digital Imaging Inc
+13a6 Videonics Inc
+13a7 Teles AG
+13a8 Exar Corp.
+ 0158 XR17C158 Octal UART
+13a9 Siemens Medical Systems, Ultrasound Group
+13aa Broadband Networks Inc
+13ab Arcom Control Systems Ltd
+13ac Motion Media Technology Ltd
+13ad Nexus Inc
+13ae ALD Technology Ltd
+13af T.Sqware
+13b0 Maxspeed Corp
+13b1 Tamura corporation
+13b2 Techno Chips Co. Ltd
+13b3 Lanart Corporation
+13b4 Wellbean Co Inc
+13b5 ARM
+13b6 Dlog GmbH
+13b7 Logic Devices Inc
+13b8 Nokia Telecommunications oy
+13b9 Elecom Co Ltd
+13ba Oxford Instruments
+13bb Sanyo Technosound Co Ltd
+13bc Bitran Corporation
+13bd Sharp corporation
+13be Miroku Jyoho Service Co. Ltd
+13bf Sharewave Inc
+13c0 Microgate Corporation
+ 0010 SyncLink WAN Adapter
+13c1 3ware Inc
+ 1000 3ware ATA-RAID
+ 1001 3ware 7000-series ATA-RAID
+ 1002 3ware ATA-RAID
+13c2 Technotrend Systemtechnik GmbH
+13c3 Janz Computer AG
+13c4 Phase Metrics
+13c5 Alphi Technology Corp
+13c6 Condor Engineering Inc
+13c7 Blue Chip Technology Ltd
+13c8 Apptech Inc
+13c9 Eaton Corporation
+13ca Iomega Corporation
+13cb Yano Electric Co Ltd
+13cc Metheus Corporation
+13cd Compatible Systems Corporation
+13ce Cocom A/S
+13cf Studio Audio & Video Ltd
+13d0 Techsan Electronics Co Ltd
+13d1 Abocom Systems Inc
+ ab06 RTL8139 [FE2000VX] CardBus Fast Ethernet Attached Port Adapter
+13d2 Shark Multimedia Inc
+13d3 IMC Networks
+13d4 Graphics Microsystems Inc
+13d5 Media 100 Inc
+13d6 K.I. Technology Co Ltd
+13d7 Toshiba Engineering Corporation
+13d8 Phobos corporation
+13d9 Apex PC Solutions Inc
+13da Intresource Systems pte Ltd
+13db Janich & Klass Computertechnik GmbH
+13dc Netboost Corporation
+13dd Multimedia Bundle Inc
+13de ABB Robotics Products AB
+13df E-Tech Inc
+ 0001 PCI56RVP Modem
+ 13df 0001 PCI56RVP Modem
+13e0 GVC Corporation
+13e1 Silicom Multimedia Systems Inc
+13e2 Dynamics Research Corporation
+13e3 Nest Inc
+13e4 Calculex Inc
+13e5 Telesoft Design Ltd
+13e6 Argosy research Inc
+13e7 NAC Incorporated
+13e8 Chip Express Corporation
+13e9 Chip Express Corporation
+13ea Dallas Semiconductor
+13eb Hauppauge Computer Works Inc
+13ec Zydacron Inc
+13ed Raytheion E-Systems
+13ee Hayes Microcomputer Products Inc
+13ef Coppercom Inc
+13f0 Sundance Technology Inc
+ 0201 ST201 Sundance Ethernet
+13f1 Oce' - Technologies B.V.
+13f2 Ford Microelectronics Inc
+13f3 Mcdata Corporation
+13f4 Troika Networks, Inc.
+ 1401 Zentai Fibre Channel Adapter
+13f5 Kansai Electric Co. Ltd
+13f6 C-Media Electronics Inc
+ 0100 CM8338A
+ 13f6 ffff CMI8338/C3DX PCI Audio Device
+ 0101 CM8338B
+ 13f6 0101 CMI8338-031 PCI Audio Device
+ 0111 CM8738
+ 1043 8077 CMI8738 6-channel audio controller
+ 1043 80e2 CMI8738 6ch-MX
+ 13f6 0111 CMI8738/C3DX PCI Audio Device
+ 0211 CM8738
+13f7 Wildfire Communications
+13f8 Ad Lib Multimedia Inc
+13f9 NTT Advanced Technology Corp.
+13fa Pentland Systems Ltd
+13fb Aydin Corp
+13fc Computer Peripherals International
+13fd Micro Science Inc
+13fe Advantech Co. Ltd
+ 1756 PCI-1756
+13ff Silicon Spice Inc
+1400 Artx Inc
+ 1401 9432 TX
+1401 CR-Systems A/S
+1402 Meilhaus Electronic GmbH
+1403 Ascor Inc
+1404 Fundamental Software Inc
+1405 Excalibur Systems Inc
+1406 Oce' Printing Systems GmbH
+1407 Lava Computer mfg Inc
+ 0100 Lava Dual Serial
+ 0101 Lava Quatro A
+ 0102 Lava Quatro B
+ 0200 Lava Port Plus
+ 0201 Lava Quad A
+ 0202 Lava Quad B
+ 0500 Lava Single Serial
+ 0600 Lava Port 650
+ 8000 Lava Parallel
+ 8001 Dual parallel port controller A
+ 8002 Lava Dual Parallel port A
+ 8003 Lava Dual Parallel port B
+ 8800 BOCA Research IOPPAR
+1408 Aloka Co. Ltd
+1409 Timedia Technology Co Ltd
+ 7168 PCI2S550 (Dual 16550 UART)
+140a DSP Research Inc
+140b Ramix Inc
+140c Elmic Systems Inc
+140d Matsushita Electric Works Ltd
+140e Goepel Electronic GmbH
+140f Salient Systems Corp
+1410 Midas lab Inc
+1411 Ikos Systems Inc
+1412 IC Ensemble Inc
+ 1712 ICE1712 [Envy24]
+1413 Addonics
+1414 Microsoft Corporation
+1415 Oxford Semiconductor Ltd
+ 8403 VScom 011H-EP1 1 port parallel adaptor
+ 9501 OX16PCI954 (Quad 16950 UART) function 0
+ 15ed 2000 MCCR Serial p0-3 of 8
+ 15ed 2001 MCCR Serial p0-3 of 16
+ 950a EXSYS EX-41092 Dual 16950 Serial adapter
+ 950b OXCB950 Cardbus 16950 UART
+ 9511 OX16PCI954 (Quad 16950 UART) function 1
+ 15ed 2000 MCCR Serial p4-7 of 8
+ 15ed 2001 MCCR Serial p4-15 of 16
+ 9521 OX16PCI952 (Dual 16950 UART)
+1416 Multiwave Innovation pte Ltd
+1417 Convergenet Technologies Inc
+1418 Kyushu electronics systems Inc
+1419 Excel Switching Corp
+141a Apache Micro Peripherals Inc
+141b Zoom Telephonics Inc
+141d Digitan Systems Inc
+141e Fanuc Ltd
+141f Visiontech Ltd
+1420 Psion Dacom plc
+1421 Ads Technologies Inc
+1422 Ygrec Systems Co Ltd
+1423 Custom Technology Corp.
+1424 Videoserver Connections
+1425 ASIC Designers Inc
+1426 Storage Technology Corp.
+1427 Better On-Line Solutions
+1428 Edec Co Ltd
+1429 Unex Technology Corp.
+142a Kingmax Technology Inc
+142b Radiolan
+142c Minton Optic Industry Co Ltd
+142d Pix stream Inc
+142e Vitec Multimedia
+142f Radicom Research Inc
+1430 ITT Aerospace/Communications Division
+1431 Gilat Satellite Networks
+1432 Edimax Computer Co.
+1433 Eltec Elektronik GmbH
+1435 Real Time Devices US Inc.
+1436 CIS Technology Inc
+1437 Nissin Inc Co
+1438 Atmel-dream
+1439 Outsource Engineering & Mfg. Inc
+143a Stargate Solutions Inc
+143b Canon Research Center, America
+143c Amlogic Inc
+143d Tamarack Microelectronics Inc
+143e Jones Futurex Inc
+143f Lightwell Co Ltd - Zax Division
+1440 ALGOL Corp.
+1441 AGIE Ltd
+1442 Phoenix Contact GmbH & Co.
+1443 Unibrain S.A.
+1444 TRW
+1445 Logical DO Ltd
+1446 Graphin Co Ltd
+1447 AIM GmBH
+1448 Alesis Studio Electronics
+1449 TUT Systems Inc
+144a Adlink Technology
+ 7296 PCI-7296
+ 7432 PCI-7432
+ 7433 PCI-7433
+ 7434 PCI-7434
+ 7841 PCI-7841
+ 8133 PCI-8133
+ 8554 PCI-8554
+ 9111 PCI-9111
+ 9113 PCI-9113
+ 9114 PCI-9114
+144b Loronix Information Systems Inc
+144c Catalina Research Inc
+144d Samsung Electronics Co Ltd
+144e OLITEC
+144f Askey Computer Corp.
+1450 Octave Communications Ind.
+1451 SP3D Chip Design GmBH
+1453 MYCOM Inc
+1454 Altiga Networks
+1455 Logic Plus Plus Inc
+1456 Advanced Hardware Architectures
+1457 Nuera Communications Inc
+1458 Giga-byte Technology
+1459 DOOIN Electronics
+145a Escalate Networks Inc
+145b PRAIM SRL
+145c Cryptek
+145d Gallant Computer Inc
+145e Aashima Technology B.V.
+145f Baldor Electric Company
+ 0001 NextMove PCI
+1460 DYNARC INC
+1461 Avermedia Technologies Inc
+1462 Micro-star International Co Ltd
+1463 Fast Corporation
+1464 Interactive Circuits & Systems Ltd
+1465 GN NETTEST Telecom DIV.
+1466 Designpro Inc.
+1467 DIGICOM SPA
+1468 AMBIT Microsystem Corp.
+1469 Cleveland Motion Controls
+146a IFR
+146b Parascan Technologies Ltd
+146c Ruby Tech Corp.
+146d Tachyon, INC.
+146e Williams Electronics Games, Inc.
+146f Multi Dimensional Consulting Inc
+1470 Bay Networks
+1471 Integrated Telecom Express Inc
+1472 DAIKIN Industries, Ltd
+1473 ZAPEX Technologies Inc
+1474 Doug Carson & Associates
+1475 PICAZO Communications
+1476 MORTARA Instrument Inc
+1477 Net Insight
+1478 DIATREND Corporation
+1479 TORAY Industries Inc
+147a FORMOSA Industrial Computing
+147b ABIT Computer Corp.
+147c AWARE, Inc.
+147d Interworks Computer Products
+147e Matsushita Graphic Communication Systems, Inc.
+147f NIHON UNISYS, Ltd.
+1480 SCII Telecom
+1481 BIOPAC Systems Inc
+1482 ISYTEC - Integrierte Systemtechnik GmBH
+1483 LABWAY Corporation
+1484 Logic Corporation
+1485 ERMA - Electronic GmBH
+1486 L3 Communications Telemetry & Instrumentation
+1487 MARQUETTE Medical Systems
+1488 KONTRON Electronik GmBH
+1489 KYE Systems Corporation
+148a OPTO
+148b INNOMEDIALOGIC Inc.
+148c C.P. Technology Co. Ltd
+148d DIGICOM Systems, Inc.
+ 1003 HCF 56k Data/Fax Modem
+148e OSI Plus Corporation
+148f Plant Equipment, Inc.
+1490 Stone Microsystems PTY Ltd.
+1491 ZEAL Corporation
+1492 Time Logic Corporation
+1493 MAKER Communications
+1494 WINTOP Technology, Inc.
+1495 TOKAI Communications Industry Co. Ltd
+1496 JOYTECH Computer Co., Ltd.
+1497 SMA Regelsysteme GmBH
+1498 TEWS Datentechnik GmBH
+1499 EMTEC CO., Ltd
+149a ANDOR Technology Ltd
+149b SEIKO Instruments Inc
+149c OVISLINK Corp.
+149d NEWTEK Inc
+149e Mapletree Networks Inc.
+149f LECTRON Co Ltd
+14a0 SOFTING GmBH
+14a1 Systembase Co Ltd
+14a2 Millennium Engineering Inc
+14a3 Maverick Networks
+14a4 GVC/BCM Advanced Research
+14a5 XIONICS Document Technologies Inc
+14a6 INOVA Computers GmBH & Co KG
+14a7 MYTHOS Systems Inc
+14a8 FEATRON Technologies Corporation
+14a9 HIVERTEC Inc
+14aa Advanced MOS Technology Inc
+14ab Mentor Graphics Corp.
+14ac Novaweb Technologies Inc
+14ad Time Space Radio AB
+14ae CTI, Inc
+14af Guillemot Corporation
+14b0 BST Communication Technology Ltd
+14b1 Nextcom K.K.
+14b2 ENNOVATE Networks Inc
+14b3 XPEED Inc
+ 0000 DSL NIC
+14b4 PHILIPS Business Electronics B.V.
+14b5 Creamware GmBH
+14b6 Quantum Data Corp.
+14b7 PROXIM Inc
+ 0001 Symphony 4110
+14b8 Techsoft Technology Co Ltd
+14b9 AIRONET Wireless Communications
+ 0001 PC4800
+ 0340 PC4800
+ 0350 PC4800
+ 4500 PC4500
+ 4800 PC4800
+14ba INTERNIX Inc.
+14bb SEMTECH Corporation
+14bc Globespan Semiconductor Inc.
+14bd CARDIO Control N.V.
+14be L3 Communications
+14bf SPIDER Communications Inc.
+14c0 COMPAL Electronics Inc
+14c1 MYRICOM Inc.
+14c2 DTK Computer
+14c3 MEDIATEK Corp.
+14c4 IWASAKI Information Systems Co Ltd
+14c5 Automation Products AB
+14c6 Data Race Inc
+14c7 Modular Technology Holdings Ltd
+14c8 Turbocomm Tech. Inc.
+14c9 ODIN Telesystems Inc
+14ca PE Logic Corp.
+14cb Billionton Systems Inc
+14cc NAKAYO Telecommunications Inc
+14cd Universal Scientific Ind.
+14ce Whistle Communications
+14cf TEK Microsystems Inc.
+14d0 Ericsson Axe R & D
+14d1 Computer Hi-Tech Co Ltd
+14d2 Titan Electronics Inc
+ 8001 VScom 010L 1 port parallel adaptor
+ 8002 VScom 020L 2 port parallel adaptor
+ 8010 VScom 100L 1 port serial adaptor
+ 8011 VScom 110L 1 port serial and 1 port parallel adaptor
+ 8020 VScom 200L 1 port serial adaptor
+ 8021 VScom 210L 2 port serial and 1 port parallel adaptor
+ 8040 VScom 400L 4 port serial adaptor
+ 8080 VScom 800L 8 port serial adaptor
+ a000 VScom 010H 1 port parallel adaptor
+ a001 VScom 100H 1 port serial adaptor
+ a003 VScom 400H 4 port serial adaptor
+ a004 VScom 400HF1 4 port serial adaptor
+ a005 VScom 200H 2 port serial adaptor
+ e001 VScom 010HV2 1 port parallel adaptor
+ e010 VScom 100HV2 1 port serial adaptor
+ e020 VScom 200HV2 2 port serial adaptor
+14d3 CIRTECH (UK) Ltd
+14d4 Panacom Technology Corp
+14d5 Nitsuko Corporation
+14d6 Accusys Inc
+14d7 Hirakawa Hewtech Corp
+14d8 HOPF Elektronik GmBH
+14d9 Alpha Processor Inc
+14da National Aerospace Laboratories
+14db AFAVLAB Technology Inc
+ 2120 TK9902
+14dc Amplicon Liveline Ltd
+ 0000 PCI230
+ 0001 PCI242
+ 0002 PCI244
+ 0003 PCI247
+ 0004 PCI248
+ 0005 PCI249
+ 0006 PCI260
+ 0007 PCI224
+ 0008 PCI234
+ 0009 PCI236
+ 000a PCI272
+ 000b PCI215
+14dd Boulder Design Labs Inc
+14de Applied Integration Corporation
+14df ASIC Communications Corp
+14e1 INVERTEX
+14e2 INFOLIBRIA
+14e3 AMTELCO
+14e4 Broadcom Corporation
+ 1644 NetXtreme BCM5700 Gigabit Ethernet
+ 1014 0277 Broadcom Vigil B5700 1000BaseTX
+ 1028 00d1 Broadcom BCM5700
+ 1028 0106 Broadcom BCM5700
+ 1028 0109 Broadcom BCM5700 1000BaseTX
+ 1028 010a Broadcom BCM5700 1000BaseTX
+ 10b7 1000 3C996-T 1000BaseTX
+ 10b7 1001 3C996B-T 1000BaseTX
+ 10b7 1002 3C996C-T 1000BaseTX
+ 10b7 1003 3C997-T 1000BaseTX Dual Port
+ 10b7 1004 3C996-SX 1000BaseSX
+ 10b7 1005 3C997-SX 1000BaseSX Dual Port
+ 10b7 1008 3C942 Gigabit LOM (31X31)
+ 14e4 0002 NetXtreme 1000BaseSX
+ 14e4 0003 NetXtreme 1000BaseSX
+ 14e4 0004 NetXtreme 1000BaseTX
+ 14e4 1028 NetXtreme 1000BaseTX
+ 14e4 1644 BCM5700 1000BaseTX
+ 1645 NetXtreme BCM5701 Gigabit Ethernet
+ 0e11 007c NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T)
+ 0e11 007d NC6770 Gigabit Server Adapter (PCI-X, 1000-SX)
+ 0e11 0085 NC7780 Gigabit Server Adapter (embedded, WOL)
+ 0e11 0099 NC7780 Gigabit Server Adapter (embedded, WOL)
+ 0e11 009a NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T)
+ 1028 0121 Broadcom BCM5701 1000BaseTX
+ 10b7 1004 3C996-SX 1000BaseSX
+ 10b7 1006 3C996B-T 1000BaseTX
+ 10b7 1007 3C1000-T 1000BaseTX
+ 10b7 1008 3C940-BR01 1000BaseTX
+ 14e4 0001 BCM5701 1000BaseTX
+ 14e4 0005 BCM5701 1000BaseTX
+ 14e4 0006 BCM5701 1000BaseTX
+ 14e4 0007 BCM5701 1000BaseSX
+ 14e4 0008 BCM5701 1000BaseTX
+ 14e4 8008 BCM5701 1000BaseTX
+ 1646 NetXtreme BCM5702 Gigabit Ethernet
+ 0e11 00bb NC7760 1000BaseTX
+ 1028 0126 Broadcom BCM5702 1000BaseTX
+ 14e4 8009 BCM5702 1000BaseTX
+ 1647 NetXtreme BCM5703 Gigabit Ethernet
+ 0e11 0099 NC7780 1000BaseTX
+ 0e11 009a NC7770 1000BaseTX
+ 14e4 0009 BCM5703 1000BaseTX
+ 14e4 000a BCM5703 1000BaseSX
+ 14e4 000b BCM5703 1000BaseTX
+ 14e4 8009 BCM5703 1000BaseTX
+ 14e4 800a BCM5703 1000BaseTX
+ 1648 NetXtreme BCM5704 Gigabit Ethernet
+ 164d NetXtreme BCM5702FE Gigabit Ethernet
+ 16a6 NetXtreme BCM5702X Gigabit Ethernet
+ 16a7 NetXtreme BCM5703X Gigabit Ethernet
+ 4212 BCM v.90 56k modem
+ 5820 BCM5820 Crypto Accelerator
+ 5821 BCM5821 Crypto Accelerator
+14e5 Pixelfusion Ltd
+14e6 SHINING Technology Inc
+14e7 3CX
+14e8 RAYCER Inc
+14e9 GARNETS System CO Ltd
+14ea Planex Communications, Inc
+ ab06 FNW-3603-TX CardBus Fast Ethernet
+14eb SEIKO EPSON Corp
+14ec ACQIRIS
+14ed DATAKINETICS Ltd
+14ee MASPRO KENKOH Corp
+14ef CARRY Computer ENG. CO Ltd
+14f0 CANON RESEACH CENTRE FRANCE
+14f1 Conexant
+ 1002 HCF 56k Modem
+ 1003 HCF 56k Modem
+ 1004 HCF 56k Modem
+ 1005 HCF 56k Modem
+ 1006 HCF 56k Modem
+ 1022 HCF 56k Modem
+ 1023 HCF 56k Modem
+ 1024 HCF 56k Modem
+ 1025 HCF 56k Modem
+ 1026 HCF 56k Modem
+ 1032 HCF 56k Modem
+ 1033 HCF 56k Data/Fax Modem
+ 1033 8077 NEC
+ 122d 4027 Dell Zeus - MDP3880-W(B) Data Fax Modem
+ 122d 4030 Dell Mercury - MDP3880-U(B) Data Fax Modem
+ 122d 4034 Dell Thor - MDP3880-W(U) Data Fax Modem
+ 13e0 020d Dell Copper
+ 13e0 020e Dell Silver
+ 13e0 0261 IBM
+ 13e0 0290 Compaq Goldwing
+ 13e0 02a0 IBM
+ 13e0 02b0 IBM
+ 13e0 02c0 Compaq Scooter
+ 13e0 02d0 IBM
+ 144f 1500 IBM P85-DF (1)
+ 144f 1501 IBM P85-DF (2)
+ 144f 150a IBM P85-DF (3)
+ 144f 150b IBM P85-DF Low Profile (1)
+ 144f 1510 IBM P85-DF Low Profile (2)
+ 1034 HCF 56k Data/Fax/Voice Modem
+ 1035 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 10cf 1098 Fujitsu P85-DFSV
+ 1036 HCF 56k Data/Fax/Voice/Spkp Modem
+ 104d 8067 HCF 56k Modem
+ 122d 4029 MDP3880SP-W
+ 122d 4031 MDP3880SP-U
+ 13e0 0209 Dell Titanium
+ 13e0 020a Dell Graphite
+ 13e0 0260 Gateway Red Owl
+ 13e0 0270 Gateway White Horse
+ 1052 HCF 56k Data/Fax Modem (Worldwide)
+ 1053 HCF 56k Data/Fax Modem (Worldwide)
+ 1054 HCF 56k Data/Fax/Voice Modem (Worldwide)
+ 1055 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (Worldwide)
+ 1056 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
+ 1057 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
+ 1059 HCF 56k Data/Fax/Voice Modem (Worldwide)
+ 1063 HCF 56k Data/Fax Modem
+ 1064 HCF 56k Data/Fax/Voice Modem
+ 1065 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 1066 HCF 56k Data/Fax/Voice/Spkp Modem
+ 122d 4033 Dell Athena - MDP3900V-U
+ 1433 HCF 56k Data/Fax Modem
+ 1434 HCF 56k Data/Fax/Voice Modem
+ 1435 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 1436 HCF 56k Data/Fax Modem
+ 1453 HCF 56k Data/Fax Modem
+ 13e0 0240 IBM
+ 13e0 0250 IBM
+ 144f 1502 IBM P95-DF (1)
+ 144f 1503 IBM P95-DF (2)
+ 1454 HCF 56k Data/Fax/Voice Modem
+ 1455 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 1456 HCF 56k Data/Fax/Voice/Spkp Modem
+ 122d 4035 Dell Europa - MDP3900V-W
+ 122d 4302 Dell MP3930V-W(C) MiniPCI
+ 1610 ADSL AccessRunner PCI Arbitration Device
+ 1611 AccessRunner PCI ADSL Interface Device
+ 1803 HCF 56k Modem
+ 0e11 0023 623-LAN Grizzly
+ 0e11 0043 623-LAN Yogi
+ 1815 HCF 56k Modem
+ 0e11 0022 Grizzly
+ 0e11 0042 Yogi
+ 2003 HSF 56k Data/Fax Modem
+ 2004 HSF 56k Data/Fax/Voice Modem
+ 2005 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 2006 HSF 56k Data/Fax/Voice/Spkp Modem
+ 2013 HSF 56k Data/Fax Modem
+ 0e11 b195 Bear
+ 0e11 b196 Seminole 1
+ 0e11 b1be Seminole 2
+ 1025 8013 Acer
+ 1033 809d NEC
+ 1033 80bc NEC
+ 155d 6793 HP
+ 155d 8850 E Machines
+ 2014 HSF 56k Data/Fax/Voice Modem
+ 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+ 2016 HSF 56k Data/Fax/Voice/Spkp Modem
+ 2043 HSF 56k Data/Fax Modem (WorldW SmartDAA)
+ 2044 HSF 56k Data/Fax/Voice Modem (WorldW SmartDAA)
+ 2045 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (WorldW SmartDAA)
+ 2046 HSF 56k Data/Fax/Voice/Spkp Modem (WorldW SmartDAA)
+ 2063 HSF 56k Data/Fax Modem (SmartDAA)
+ 2064 HSF 56k Data/Fax/Voice Modem (SmartDAA)
+ 2065 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (SmartDAA)
+ 2066 HSF 56k Data/Fax/Voice/Spkp Modem (SmartDAA)
+ 2093 HSF 56k Modem
+ 155d 2f07 Legend
+ 2143 HSF 56k Data/Fax/Cell Modem (Mob WorldW SmartDAA)
+ 2144 HSF 56k Data/Fax/Voice/Cell Modem (Mob WorldW SmartDAA)
+ 2145 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob WorldW SmartDAA)
+ 2146 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob WorldW SmartDAA)
+ 2163 HSF 56k Data/Fax/Cell Modem (Mob SmartDAA)
+ 2164 HSF 56k Data/Fax/Voice/Cell Modem (Mob SmartDAA)
+ 2165 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob SmartDAA)
+ 2166 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob SmartDAA)
+ 2343 HSF 56k Data/Fax CardBus Modem (Mob WorldW SmartDAA)
+ 2344 HSF 56k Data/Fax/Voice CardBus Modem (Mob WorldW SmartDAA)
+ 2345 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob WorldW SmartDAA)
+ 2346 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob WorldW SmartDAA)
+ 2363 HSF 56k Data/Fax CardBus Modem (Mob SmartDAA)
+ 2364 HSF 56k Data/Fax/Voice CardBus Modem (Mob SmartDAA)
+ 2365 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob SmartDAA)
+ 2366 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob SmartDAA)
+ 2443 HSF 56k Data/Fax Modem (Mob WorldW SmartDAA)
+ 104d 8075 Modem
+ 104d 8083 Modem
+ 104d 8097 Modem
+ 2444 HSF 56k Data/Fax/Voice Modem (Mob WorldW SmartDAA)
+ 2445 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob WorldW SmartDAA)
+ 2446 HSF 56k Data/Fax/Voice/Spkp Modem (Mob WorldW SmartDAA)
+ 2463 HSF 56k Data/Fax Modem (Mob SmartDAA)
+ 2464 HSF 56k Data/Fax/Voice Modem (Mob SmartDAA)
+ 2465 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob SmartDAA)
+ 2466 HSF 56k Data/Fax/Voice/Spkp Modem (Mob SmartDAA)
+ 2f00 HSF 56k HSFi Modem
+ 13e0 8d84 IBM HSFi V.90
+ 13e0 8d85 Compaq Stinger
+ 14f1 2004 Dynalink 56PMi
+ 8234 RS8234 ATM SAR Controller [ServiceSAR Plus]
+14f2 MOBILITY Electronics
+14f3 BROADLOGIC
+14f4 TOKYO Electronic Industry CO Ltd
+14f5 SOPAC Ltd
+14f6 COYOTE Technologies LLC
+14f7 WOLF Technology Inc
+14f8 AUDIOCODES Inc
+14f9 AG COMMUNICATIONS
+14fa WANDEL & GOCHERMANN
+14fb TRANSAS MARINE (UK) Ltd
+14fc QUADRICS Supercomputers World
+14fd JAPAN Computer Industry Inc
+14fe ARCHTEK TELECOM Corp
+14ff TWINHEAD INTERNATIONAL Corp
+1500 DELTA Electronics, Inc
+1501 BANKSOFT CANADA Ltd
+1502 MITSUBISHI ELECTRIC LOGISTICS SUPPORT Co Ltd
+1503 KAWASAKI LSI USA Inc
+1504 KAISER Electronics
+1505 ITA INGENIEURBURO FUR TESTAUFGABEN GmbH
+1506 CHAMELEON Systems Inc
+# Should be HTEC Ltd, but there are no known HTEC chips and 1507 is already used by mistake by Motorola (see vendor ID 1057).
+1507 Motorola ?? / HTEC
+ 0001 MPC105 [Eagle]
+ 0002 MPC106 [Grackle]
+ 0003 MPC8240 [Kahlua]
+ 0100 MC145575 [HFC-PCI]
+ 0431 KTI829c 100VG
+ 4801 Raven
+ 4802 Falcon
+ 4803 Hawk
+ 4806 CPX8216
+1508 HONDA CONNECTORS/MHOTRONICS Inc
+1509 FIRST INTERNATIONAL Computer Inc
+150a FORVUS RESEARCH Inc
+150b YAMASHITA Systems Corp
+150c KYOPAL CO Ltd
+150d WARPSPPED Inc
+150e C-PORT Corp
+150f INTEC GmbH
+1510 BEHAVIOR TECH Computer Corp
+1511 CENTILLIUM Technology Corp
+1512 ROSUN Technologies Inc
+1513 Raychem
+1514 TFL LAN Inc
+1515 Advent design
+1516 MYSON Technology Inc
+ 0803 SURECOM EP-320X-S 100/10M Ethernet PCI Adapter
+ 1320 10bd SURECOM EP-320X-S 100/10M Ethernet PCI Adapter
+1517 ECHOTEK Corp
+1518 PEP MODULAR Computers GmbH
+1519 TELEFON AKTIEBOLAGET LM Ericsson
+151a Globetek
+ 1002 PCI-1002
+ 1004 PCI-1004
+ 1008 PCI-1008
+151b COMBOX Ltd
+151c DIGITAL AUDIO LABS Inc
+151d Fujitsu Computer Products Of America
+151e MATRIX Corp
+151f TOPIC SEMICONDUCTOR Corp
+ 0000 TP560 Data/Fax/Voice 56k modem
+1520 CHAPLET System Inc
+1521 BELL Corp
+1522 MainPine Ltd
+ 0100 PCI <-> IOBus Bridge
+ 1522 0200 RockForceDUO 2 Port V.92/V.44 Data/Fax/Voice Modem
+ 1522 0300 RockForceQUATRO 4 Port V.92/V.44 Data/Fax/Voice Modem
+ 1522 0400 RockForceDUO+ 2 Port V.92/V.44 Data/Fax/Voice Modem
+ 1522 0500 RockForceQUATRO+ 4 Port V.92/V.44 Data/Fax/Voice Modem
+ 1522 0600 RockForce+ 2 Port V.90 Data/Fax/Voice Modem
+1523 MUSIC Semiconductors
+1524 ENE Technology Inc
+1525 IMPACT Technologies
+1526 ISS, Inc
+1527 SOLECTRON
+1528 ACKSYS
+1529 AMERICAN MICROSystems Inc
+152a QUICKTURN DESIGN Systems
+152b FLYTECH Technology CO Ltd
+152c MACRAIGOR Systems LLC
+152d QUANTA Computer Inc
+152e MELEC Inc
+152f PHILIPS - CRYPTO
+1530 ACQIS Technology Inc
+1531 CHRYON Corp
+1532 ECHELON Corp
+1533 BALTIMORE
+1534 ROAD Corp
+1535 EVERGREEN Technologies Inc
+1537 DATALEX COMMUNCATIONS
+1538 ARALION Inc
+1539 ATELIER INFORMATIQUES et ELECTRONIQUE ETUDES S.A.
+153a ONO SOKKI
+153b TERRATEC Electronic GmbH
+153c ANTAL Electronic
+153d FILANET Corp
+153e TECHWELL Inc
+153f MIPS DENMARK
+1540 PROVIDEO MULTIMEDIA Co Ltd
+1541 MACHONE Communications
+1542 VIVID Technology Inc
+1543 SILICON Laboratories
+1544 DCM DATA Systems
+1545 VISIONTEK
+1546 IOI Technology Corp
+1547 MITUTOYO Corp
+1548 JET PROPULSION Laboratory
+1549 INTERCONNECT Systems Solutions
+154a MAX Technologies Inc
+154b COMPUTEX Co Ltd
+154c VISUAL Technology Inc
+154d PAN INTERNATIONAL Industrial Corp
+154e SERVOTEST Ltd
+154f STRATABEAM Technology
+1550 OPEN NETWORK Co Ltd
+1551 SMART Electronic DEVELOPMENT GmBH
+1552 RACAL AIRTECH Ltd
+1553 CHICONY Electronics Co Ltd
+1554 PROLINK Microsystems Corp
+1555 GESYTEC GmBH
+1556 PLD APPLICATIONS
+1557 MEDIASTAR Co Ltd
+1558 CLEVO/KAPOK Computer
+1559 SI LOGIC Ltd
+155a INNOMEDIA Inc
+155b PROTAC INTERNATIONAL Corp
+155c Cemax-Icon Inc
+155d Mac System Co Ltd
+155e LP Elektronik GmbH
+155f Perle Systems Ltd
+1560 Terayon Communications Systems
+1561 Viewgraphics Inc
+1562 Symbol Technologies
+1563 A-Trend Technology Co Ltd
+1564 Yamakatsu Electronics Industry Co Ltd
+1565 Biostar Microtech Int'l Corp
+1566 Ardent Technologies Inc
+1567 Jungsoft
+1568 DDK Electronics Inc
+1569 Palit Microsystems Inc.
+156a Avtec Systems
+156b 2wire Inc
+156c Vidac Electronics GmbH
+156d Alpha-Top Corp
+156e Alfa Inc
+156f M-Systems Flash Disk Pioneers Ltd
+1570 Lecroy Corp
+1571 Contemporary Controls
+ a001 CCSI PCI20-485 ARCnet
+ a002 CCSI PCI20-485D ARCnet
+ a003 CCSI PCI20-485X ARCnet
+ a004 CCSI PCI20-CXB ARCnet
+ a005 CCSI PCI20-CXS ARCnet
+ a006 CCSI PCI20-FOG-SMA ARCnet
+ a007 CCSI PCI20-FOG-ST ARCnet
+ a008 CCSI PCI20-TB5 ARCnet
+ a009 CCSI PCI20-5-485 5Mbit ARCnet
+ a00a CCSI PCI20-5-485D 5Mbit ARCnet
+ a00b CCSI PCI20-5-485X 5Mbit ARCnet
+ a00c CCSI PCI20-5-FOG-ST 5Mbit ARCnet
+ a00d CCSI PCI20-5-FOG-SMA 5Mbit ARCnet
+ a201 CCSI PCI22-485 10Mbit ARCnet
+ a202 CCSI PCI22-485D 10Mbit ARCnet
+ a203 CCSI PCI22-485X 10Mbit ARCnet
+ a204 CCSI PCI22-CHB 10Mbit ARCnet
+ a205 CCSI PCI22-FOG_ST 10Mbit ARCnet
+ a206 CCSI PCI22-THB 10Mbit ARCnet
+1572 Otis Elevator Company
+1573 Lattice - Vantis
+1574 Fairchild Semiconductor
+1575 Voltaire Advanced Data Security Ltd
+1576 Viewcast COM
+1578 HITT
+1579 Dual Technology Corp
+157a Japan Elecronics Ind Inc
+157b Star Multimedia Corp
+157c Eurosoft (UK)
+ 8001 Fix2000 PCI Y2K Compliance Card
+157d Gemflex Networks
+157e Transition Networks
+157f PX Instruments Technology Ltd
+1580 Primex Aerospace Co
+1581 SEH Computertechnik GmbH
+1582 Cytec Corp
+1583 Inet Technologies Inc
+1584 Uniwill Computer Corp
+1585 Logitron
+1586 Lancast Inc
+1587 Konica Corp
+1588 Solidum Systems Corp
+1589 Atlantek Microsystems Pty Ltd
+158a Digalog Systems Inc
+158b Allied Data Technologies
+158c Hitachi Semiconductor & Devices Sales Co Ltd
+158d Point Multimedia Systems
+158e Lara Technology Inc
+158f Ditect Coop
+1590 3pardata Inc
+1591 ARN
+1592 Syba Tech Ltd
+ 0781 Multi-IO Card
+ 0782 Parallel Port Card 2xEPP
+ 0783 Multi-IO Card
+ 0785 Multi-IO Card
+ 0786 Multi-IO Card
+ 0787 Multi-IO Card
+ 0788 Multi-IO Card
+ 078a Multi-IO Card
+1593 Bops Inc
+1594 Netgame Ltd
+1595 Diva Systems Corp
+1596 Folsom Research Inc
+1597 Memec Design Services
+1598 Granite Microsystems
+1599 Delta Electronics Inc
+159a General Instrument
+159b Faraday Technology Corp
+159c Stratus Computer Systems
+159d Ningbo Harrison Electronics Co Ltd
+159e A-Max Technology Co Ltd
+159f Galea Network Security
+15a0 Compumaster SRL
+15a1 Geocast Network Systems
+15a2 Catalyst Enterprises Inc
+ 0001 TA700 PCI Bus Analyzer/Exerciser
+15a3 Italtel
+15a4 X-Net OY
+15a5 Toyota Macs Inc
+15a6 Sunlight Ultrasound Technologies Ltd
+15a7 SSE Telecom Inc
+15a8 Shanghai Communications Technologies Center
+15aa Moreton Bay
+15ab Bluesteel Networks Inc
+15ac North Atlantic Instruments
+15ad VMWare Inc
+ 0710 Virtual SVGA
+15ae Amersham Pharmacia Biotech
+15b0 Zoltrix International Ltd
+15b1 Source Technology Inc
+15b2 Mosaid Technologies Inc
+15b3 Mellanox Technology
+ 5274 MT21108 InfiniBridge
+15b4 CCI/TRIAD
+15b5 Cimetrics Inc
+15b6 Texas Memory Systems Inc
+15b7 Sandisk Corp
+15b8 ADDI-DATA GmbH
+15b9 Maestro Digital Communications
+15ba Impacct Technology Corp
+15bb Portwell Inc
+15bc Agilent Technologies
+ 2929 E2929A PCI/PCI-X Bus Analyzer
+15bd DFI Inc
+15be Sola Electronics
+15bf High Tech Computer Corp (HTC)
+15c0 BVM Ltd
+15c1 Quantel
+15c2 Newer Technology Inc
+15c3 Taiwan Mycomp Co Ltd
+15c4 EVSX Inc
+15c5 Procomp Informatics Ltd
+15c6 Technical University of Budapest
+15c7 Tateyama System Laboratory Co Ltd
+ 0349 Tateyama C-PCI PLC/NC card Rev.01A
+15c8 Penta Media Co Ltd
+15c9 Serome Technology Inc
+15ca Bitboys OY
+15cb AG Electronics Ltd
+15cc Hotrail Inc
+15cd Dreamtech Co Ltd
+15ce Genrad Inc
+15cf Hilscher GmbH
+15d1 Infineon Technologies AG
+15d2 FIC (First International Computer Inc)
+15d3 NDS Technologies Israel Ltd
+15d4 Iwill Corp
+15d5 Tatung Co
+15d6 Entridia Corp
+15d7 Rockwell-Collins Inc
+15d8 Cybernetics Technology Co Ltd
+15d9 Super Micro Computer Inc
+15da Cyberfirm Inc
+15db Applied Computing Systems Inc
+15dc Litronic Inc
+ 0001 Argus 300 PCI Cryptography Module
+15dd Sigmatel Inc
+15de Malleable Technologies Inc
+15df Infinilink Corp
+15e0 Cacheflow Inc
+15e1 Voice Technologies Group Inc
+15e2 Quicknet Technologies Inc
+15e3 Networth Technologies Inc
+15e4 VSN Systemen BV
+15e5 Valley technologies Inc
+15e6 Agere Inc
+15e7 Get Engineering Corp
+15e8 National Datacomm Corp
+ 0130 Wireless PCI Card
+15e9 Pacific Digital Corp
+15ea Tokyo Denshi Sekei K.K.
+15eb Drsearch GmbH
+15ec Beckhoff GmbH
+15ed Macrolink Inc
+15ee In Win Development Inc
+15ef Intelligent Paradigm Inc
+15f0 B-Tree Systems Inc
+15f1 Times N Systems Inc
+15f2 Diagnostic Instruments Inc
+15f3 Digitmedia Corp
+15f4 Valuesoft
+15f5 Power Micro Research
+15f6 Extreme Packet Device Inc
+15f7 Banctec
+15f8 Koga Electronics Co
+15f9 Zenith Electronics Corp
+15fa J.P. Axzam Corp
+15fb Zilog Inc
+15fc Techsan Electronics Co Ltd
+15fd N-CUBED.NET
+15fe Kinpo Electronics Inc
+15ff Fastpoint Technologies Inc
+1600 Northrop Grumman - Canada Ltd
+1601 Tenta Technology
+1602 Prosys-tec Inc
+1603 Nokia Wireless Communications
+1604 Central System Research Co Ltd
+1605 Pairgain Technologies
+1606 Europop AG
+1607 Lava Semiconductor Manufacturing Inc
+1608 Automated Wagering International
+1609 Scimetric Instruments Inc
+1619 FarSite Communications Ltd
+ 0400 FarSync T2P (2 port X.21/V.35/V.24)
+ 0440 FarSync T4P (4 port X.21/V.35/V.24)
+1629 Kongsberg Spacetec AS
+ 1003 Format synchronizer v3.0
+ 2002 Fast Universal Data Output
+1638 Standard Microsystems Corp [SMC]
+ 1100 SMC2602W EZConnect / Addtron AWA-100
+1657 Brocade Communications Systems, Inc.
+165d Hsing Tech. Enterprise Co., Ltd.
+1661 Worldspace Corp.
+1668 Action Tec Electronics Inc
+16ec U.S. Robotics
+ 3685 Wireless Access PCI Adapter Model 022415
+16f6 VideoTele.com, Inc.
+170b NetOctave Inc
+170c YottaYotta Inc.
+173b Altima (nee Broadcom)
+ 03e8 AC1000 Gigabit Ethernet
+ 03ea AC9100 Gigabit Ethernet
+1743 Peppercon AG
+ 8139 ROL/F-100 Fast Ethernet Adapter with ROL
+174b PC Partner Limited
+175e Sanera Systems, Inc.
+# also used by Struck Innovative Systeme for joint developments
+1796 Research Centre Juelich
+ 0001 SIS1100 [Gigabit link]
+ 0002 HOTlink
+ 0003 Counter Timer
+ 0004 CAMAC Controller
+ 0005 PROFIBUS
+ 0006 AMCC HOTlink
+1813 Ambient Technologies Inc
+1a08 Sierra semiconductor
+ 0000 SC15064
+1b13 Jaton Corp
+1c1c Symphony
+ 0001 82C101
+1d44 DPT
+ a400 PM2x24/PM3224
+1de1 Tekram Technology Co.,Ltd.
+ 0391 TRM-S1040
+ 2020 DC-390
+ 690c 690c
+ dc29 DC290
+2001 Temporal Research Ltd
+21c3 21st Century Computer Corp.
+2348 Racore
+ 2010 8142 100VG/AnyLAN
+2646 Kingston Technologies
+270b Xantel Corporation
+270f Chaintech Computer Co. Ltd
+2711 AVID Technology Inc.
+2a15 3D Vision(???)
+3000 Hansol Electronics Inc.
+3142 Post Impression Systems.
+3388 Hint Corp
+ 0021 HB1-SE33 PCI-PCI Bridge
+ 8011 VXPro II Chipset
+ 3388 8011 VXPro II Chipset CPU to PCI Bridge
+ 8012 VXPro II Chipset
+ 3388 8012 VXPro II Chipset PCI to ISA Bridge
+ 8013 VXPro II IDE
+ 3388 8013 VXPro II Chipset EIDE Controller
+3411 Quantum Designs (H.K.) Inc
+3513 ARCOM Control Systems Ltd
+38ef 4Links
+3d3d 3DLabs
+ 0001 GLINT 300SX
+ 0002 GLINT 500TX
+ 0003 GLINT Delta
+ 0004 Permedia
+ 0005 Permedia
+ 0006 GLINT MX
+ 0007 3D Extreme
+ 0008 GLINT Gamma G1
+ 0009 Permedia II 2D+3D
+ 1040 0011 AccelStar II
+ 3d3d 0100 AccelStar II 3D Accelerator
+ 3d3d 0111 Permedia 3:16
+ 3d3d 0114 Santa Ana
+ 3d3d 0116 Oxygen GVX1
+ 3d3d 0119 Scirocco
+ 3d3d 0120 Santa Ana PCL
+ 3d3d 0125 Oxygen VX1
+ 3d3d 0127 Permedia3 Create!
+ 000a GLINT R3
+ 3d3d 0121 Oxygen VX1
+ 0100 Permedia II 2D+3D
+ 1004 Permedia
+ 3d04 Permedia
+ ffff Glint VGA
+4005 Avance Logic Inc.
+ 0300 ALS300 PCI Audio Device
+ 0308 ALS300+ PCI Audio Device
+ 0309 PCI Input Controller
+ 1064 ALG-2064
+ 2064 ALG-2064i
+ 2128 ALG-2364A GUI Accelerator
+ 2301 ALG-2301
+ 2302 ALG-2302
+ 2303 AVG-2302 GUI Accelerator
+ 2364 ALG-2364A
+ 2464 ALG-2464
+ 2501 ALG-2564A/25128A
+ 4000 ALS4000 Audio Chipset
+ 4005 4000 ALS4000 Audio Chipset
+ 4710 ALC200/200P
+4033 Addtron Technology Co, Inc.
+ 1360 RTL8139 Ethernet
+4143 Digital Equipment Corp
+416c Aladdin Knowledge Systems
+ 0100 AladdinCARD
+ 0200 CPC
+4444 Internext Compression Inc
+4468 Bridgeport machines
+4594 Cogetec Informatique Inc
+45fb Baldor Electric Company
+4680 Umax Computer Corp
+4843 Hercules Computer Technology Inc
+4916 RedCreek Communications Inc
+ 1960 RedCreek PCI adapter
+4943 Growth Networks
+4978 Axil Computer Inc
+4a14 NetVin
+ 5000 NV5000SC
+ 4a14 5000 RT8029-Based Ethernet Adapter
+4b10 Buslogic Inc.
+4c48 LUNG HWA Electronics
+4c53 SBS Technologies
+4ca1 Seanix Technology Inc
+4d51 MediaQ Inc.
+ 0200 MQ-200
+4d54 Microtechnica Co Ltd
+4ddc ILC Data Device Corp
+ 0100 DD-42924I5-300 (ARINC 429 Data Bus)
+ 0801 BU-65570I1 MIL-STD-1553 Test and Simulation
+ 0802 BU-65570I2 MIL-STD-1553 Test and Simulation
+ 0811 BU-65572I1 MIL-STD-1553 Test and Simulation
+ 0812 BU-65572I2 MIL-STD-1553 Test and Simulation
+ 0881 BU-65570T1 MIL-STD-1553 Test and Simulation
+ 0882 BU-65570T2 MIL-STD-1553 Test and Simulation
+ 0891 BU-65572T1 MIL-STD-1553 Test and Simulation
+ 0892 BU-65572T2 MIL-STD-1553 Test and Simulation
+ 0901 BU-65565C1 MIL-STD-1553 Data Bus
+ 0902 BU-65565C2 MIL-STD-1553 Data Bus
+ 0903 BU-65565C3 MIL-STD-1553 Data Bus
+ 0904 BU-65565C4 MIL-STD-1553 Data Bus
+ 0b01 BU-65569I1 MIL-STD-1553 Data Bus
+ 0b02 BU-65569I2 MIL-STD-1553 Data Bus
+ 0b03 BU-65569I3 MIL-STD-1553 Data Bus
+ 0b04 BU-65569I4 MIL-STD-1553 Data Bus
+5046 GemTek Technology Corporation
+ 1001 PCI Radio
+5053 Voyetra Technologies
+ 2010 Daytona Audio Adapter
+5136 S S Technologies
+5143 Qualcomm Inc
+5145 Ensoniq (Old)
+ 3031 Concert AudioPCI
+5301 Alliance Semiconductor Corp.
+ 0001 ProMotion aT3D
+5333 S3 Inc.
+ 0551 Plato/PX (system)
+ 5631 86c325 [ViRGE]
+ 8800 86c866 [Vision 866]
+ 8801 86c964 [Vision 964]
+ 8810 86c764_0 [Trio 32 vers 0]
+ 8811 86c764/765 [Trio32/64/64V+]
+ 8812 86cM65 [Aurora64V+]
+ 8813 86c764_3 [Trio 32/64 vers 3]
+ 8814 86c767 [Trio 64UV+]
+ 8815 86cM65 [Aurora 128]
+ 883d 86c988 [ViRGE/VX]
+ 8870 FireGL
+ 8880 86c868 [Vision 868 VRAM] vers 0
+ 8881 86c868 [Vision 868 VRAM] vers 1
+ 8882 86c868 [Vision 868 VRAM] vers 2
+ 8883 86c868 [Vision 868 VRAM] vers 3
+ 88b0 86c928 [Vision 928 VRAM] vers 0
+ 88b1 86c928 [Vision 928 VRAM] vers 1
+ 88b2 86c928 [Vision 928 VRAM] vers 2
+ 88b3 86c928 [Vision 928 VRAM] vers 3
+ 88c0 86c864 [Vision 864 DRAM] vers 0
+ 88c1 86c864 [Vision 864 DRAM] vers 1
+ 88c2 86c864 [Vision 864-P DRAM] vers 2
+ 88c3 86c864 [Vision 864-P DRAM] vers 3
+ 88d0 86c964 [Vision 964 VRAM] vers 0
+ 88d1 86c964 [Vision 964 VRAM] vers 1
+ 88d2 86c964 [Vision 964-P VRAM] vers 2
+ 88d3 86c964 [Vision 964-P VRAM] vers 3
+ 88f0 86c968 [Vision 968 VRAM] rev 0
+ 88f1 86c968 [Vision 968 VRAM] rev 1
+ 88f2 86c968 [Vision 968 VRAM] rev 2
+ 88f3 86c968 [Vision 968 VRAM] rev 3
+ 8900 86c755 [Trio 64V2/DX]
+ 5333 8900 86C775 Trio64V2/DX
+ 8901 86c775/86c785 [Trio 64V2/DX or /GX]
+ 5333 8901 86C775 Trio64V2/DX, 86C785 Trio64V2/GX
+ 8902 Plato/PX
+ 8903 Trio 3D business multimedia
+ 8904 Trio 64 3D
+ 1014 00db Integrated Trio3D
+ 5333 8904 86C365 Trio3D AGP
+ 8905 Trio 64V+ family
+ 8906 Trio 64V+ family
+ 8907 Trio 64V+ family
+ 8908 Trio 64V+ family
+ 8909 Trio 64V+ family
+ 890a Trio 64V+ family
+ 890b Trio 64V+ family
+ 890c Trio 64V+ family
+ 890d Trio 64V+ family
+ 890e Trio 64V+ family
+ 890f Trio 64V+ family
+ 8a01 ViRGE/DX or /GX
+ 0e11 b032 ViRGE/GX
+ 10b4 1617 Nitro 3D
+ 10b4 1717 Nitro 3D
+ 5333 8a01 ViRGE/DX
+ 8a10 ViRGE/GX2
+ 1092 8a10 Stealth 3D 4000
+ 8a13 86c368 [Trio 3D/2X]
+ 5333 8a13 Trio3D/2X
+ 8a20 86c794 [Savage 3D]
+ 5333 8a20 86C391 Savage3D
+ 8a21 86c390 [Savage 3D/MV]
+ 5333 8a21 86C390 Savage3D/MV
+ 8a22 Savage 4
+ 1033 8068 Savage 4
+ 1033 8069 Savage 4
+ 105d 0018 SR9 8Mb SDRAM
+ 105d 002a SR9 Pro 16Mb SDRAM
+ 105d 003a SR9 Pro 32Mb SDRAM
+ 105d 092f SR9 Pro+ 16Mb SGRAM
+ 1092 4207 Stealth III S540
+ 1092 4800 Stealth III S540
+ 1092 4807 SpeedStar A90
+ 1092 4808 Stealth III S540
+ 1092 4809 Stealth III S540
+ 1092 480e Stealth III S540
+ 1092 4904 Stealth III S520
+ 1092 4905 SpeedStar A200
+ 1092 4a09 Stealth III S540
+ 1092 4a0b Stealth III S540 Xtreme
+ 1092 4a0f Stealth III S540
+ 1092 4e01 Stealth III S540
+ 1102 101d 3d Blaster Savage 4
+ 1102 101e 3d Blaster Savage 4
+ 5333 8100 86C394-397 Savage4 SDRAM 100
+ 5333 8110 86C394-397 Savage4 SDRAM 110
+ 5333 8125 86C394-397 Savage4 SDRAM 125
+ 5333 8143 86C394-397 Savage4 SDRAM 143
+ 5333 8a22 86C394-397 Savage4
+ 5333 8a2e 86C394-397 Savage4 32bit
+ 5333 9125 86C394-397 Savage4 SGRAM 125
+ 5333 9143 86C394-397 Savage4 SGRAM 143
+ 8a23 Savage 4
+ 8a25 ProSavage PM133
+ 8a26 ProSavage KM133
+ 8c00 ViRGE/M3
+ 8c01 ViRGE/MX
+ 1179 0001 ViRGE/MX
+ 8c02 ViRGE/MX+
+ 8c03 ViRGE/MX+MV
+ 8c10 86C270-294 Savage/MX-MV
+ 8c11 82C270-294 Savage/MX
+ 8c12 86C270-294 Savage/IX-MV
+ 8c13 86C270-294 Savage/IX
+ 8c22 SuperSavage MX/128
+ 8c24 SuperSavage MX/64
+ 8c26 SuperSavage MX/64C
+ 8c2a SuperSavage IX/128 SDR
+ 8c2b SuperSavage IX/128 DDR
+ 8c2c SuperSavage IX/64 SDR
+ 8c2d SuperSavage IX/64 DDR
+ 8c2e SuperSavage IX/C SDR
+ 1014 01fc ThinkPad T23 (2647-4MG)
+ 8c2f SuperSavage IX/C DDR
+# Integrated in VIA ProSavage PN133 North Bridge
+ 8d01 VT8603 [ProSavage PN133] AGP4X VGA Controller (Twister)
+ 8d02 VT8636A [ProSavage KN133] AGP4X VGA Controller (TwisterK)
+ 8d04 VT8751 [ProSavageDDR P4M266] VGA Controller
+ 9102 86C410 Savage 2000
+ 1092 5932 Viper II Z200
+ 1092 5934 Viper II Z200
+ 1092 5952 Viper II Z200
+ 1092 5954 Viper II Z200
+ 1092 5a35 Viper II Z200
+ 1092 5a37 Viper II Z200
+ 1092 5a55 Viper II Z200
+ 1092 5a57 Viper II Z200
+ ca00 SonicVibes
+544c Teralogic Inc
+5455 Technische University Berlin
+ 4458 S5933
+5519 Cnet Technologies, Inc.
+5544 Dunord Technologies
+ 0001 I-30xx Scanner Interface
+5555 Genroco, Inc
+ 0003 TURBOstor HFP-832 [HiPPI NIC]
+5700 Netpower
+6356 UltraStor
+6374 c't Magazin für Computertechnik
+ 6773 GPPCI
+6409 Logitec Corp.
+6666 Decision Computer International Co.
+ 0001 PCCOM4
+ 0002 PCCOM8
+7604 O.N. Electronic Co Ltd.
+7bde MIDAC Corporation
+7fed PowerTV
+8008 Quancom Electronic GmbH
+ 0010 WDOG1 [PCI-Watchdog 1]
+ 0011 PWDOG2 [PCI-Watchdog 2]
+8086 Intel Corp.
+ 0007 82379AB
+ 0008 Extended Express System Support Controller
+ 0039 21145
+ 0122 82437FX
+ 0482 82375EB
+ 0483 82424ZX [Saturn]
+ 0484 82378IB [SIO ISA Bridge]
+ 0486 82430ZX [Aries]
+ 04a3 82434LX [Mercury/Neptune]
+ 04d0 82437FX [Triton FX]
+ 0600 RAID Controller
+ 0960 80960RP [i960 RP Microprocessor/Bridge]
+ 0962 80960RM [i960RM Bridge]
+ 0964 80960RP [i960 RP Microprocessor/Bridge]
+ 1000 82542 Gigabit Ethernet Controller
+ 0e11 b0df NC1632 Gigabit Ethernet Adapter (1000-SX)
+ 0e11 b0e0 NC1633 Gigabit Ethernet Adapter (1000-LX)
+ 0e11 b123 NC1634 Gigabit Ethernet Adapter (1000-SX)
+ 1014 0119 Netfinity Gigabit Ethernet SX Adapter
+ 8086 1000 PRO/1000 Gigabit Server Adapter
+ 1001 82543GC Gigabit Ethernet Controller
+ 0e11 004a NC6136 Gigabit Server Adapter
+ 1014 01ea Netfinity Gigabit Ethernet SX Adapter
+ 8086 1003 PRO/1000 F Server Adapter
+ 1002 Pro 100 LAN+Modem 56 Cardbus II
+ 8086 200e Pro 100 LAN+Modem 56 Cardbus II
+ 8086 2013 Pro 100 SR Mobile Combo Adapter
+ 8086 2017 Pro 100 S Combo Mobile Adapter
+ 1004 82543GC Gigabit Ethernet Controller
+ 0e11 0049 NC7132 Gigabit Upgrade Module
+ 0e11 b1a4 NC7131 Gigabit Server Adapter
+ 1014 10f2 Gigabit Ethernet Server Adapter
+ 8086 1004 PRO/1000 T Server Adapter
+ 8086 2004 PRO/1000 T Server Adapter
+ 1008 82544EI Gigabit Ethernet Controller
+ 8086 1107 PRO/1000 XT Server Adapter
+ 8086 2107 PRO/1000 XT Server Adapter
+ 8086 2110 PRO/1000 XT Server Adapter
+ 1009 82544EI Gigabit Ethernet Controller
+ 8086 1109 PRO/1000 XF Server Adapter
+ 8086 2109 PRO/1000 XF Server Adapter
+ 100c 82544GC Gigabit Ethernet Controller
+ 8086 1112 PRO/1000 T Desktop Adapter
+ 8086 2112 PRO/1000 T Desktop Adapter
+ 100d 82544GC Gigabit Ethernet Controller
+ 100e 82540EM Gigabit Ethernet Controller
+ 8086 001e PRO/1000 MT Desktop Adapter
+ 8086 002e PRO/1000 MT Desktop Adapter
+ 100f 82545EM Gigabit Ethernet Controller
+ 8086 1001 PRO/1000 MT Server Adapter
+ 1010 82546EB Gigabit Ethernet Controller
+ 8086 1011 PRO/1000 MT Dual Port Server Adapter
+ 1011 82545EM Gigabit Ethernet Controller
+ 8086 1002 PRO/1000 MF Server Adapter
+ 1012 82546EB Gigabit Ethernet Controller
+ 8086 1012 PRO/1000 MF Dual Port Server Adapter
+ 1029 82559 Ethernet Controller
+ 1030 82559 InBusiness 10/100
+ 1031 82801CAM (ICH3) PRO/100 VE (LOM) Ethernet Controller
+ 1014 0209 ThinkPad A30p (2653-64G)
+ 104d 80e7 Vaio PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 107b 5350 EtherExpress PRO/100 VE
+ 1179 0001 EtherExpress PRO/100 VE
+ 144d c000 EtherExpress PRO/100 VE
+ 144d c001 EtherExpress PRO/100 VE
+ 144d c003 EtherExpress PRO/100 VE
+ 1032 82801CAM (ICH3) PRO/100 VE Ethernet Controller
+ 1033 82801CAM (ICH3) PRO/100 VM (LOM) Ethernet Controller
+ 1034 82801CAM (ICH3) PRO/100 VM Ethernet Controller
+ 1035 82801CAM (ICH3)/82562EH (LOM) Ethernet Controller
+ 1036 82801CAM (ICH3) 82562EH Ethernet Controller
+ 1037 82801CAM (ICH3) Chipset Ethernet Controller
+ 1038 82801CAM (ICH3) PRO/100 VM (KM) Ethernet Controller
+ 1039 82801BD PRO/100 VE (LOM) Ethernet Controller
+ 103a 82801BD PRO/100 VE (CNR) Ethernet Controller
+ 103b 82801BD PRO/100 VM (LOM) Ethernet Controller
+ 103c 82801BD PRO/100 VM (CNR) Ethernet Controller
+ 103d 82801BD PRO/100 VE (MOB) Ethernet Controller
+ 103e 82801BD PRO/100 VM (MOB) Ethernet Controller
+ 1059 82551QM Ethernet Controller
+ 1130 82815 815 Chipset Host Bridge and Memory Controller Hub
+ 1043 8027 TUSL2-C Mainboard
+ 104d 80df Vaio PCG-FX403
+ 1131 82815 815 Chipset AGP Bridge
+ 1132 82815 CGC [Chipset Graphics Controller]
+ 1025 1016 Travelmate 612 TX
+ 104d 80df Vaio PCG-FX403
+ 1161 82806AA PCI64 Hub Advanced Programmable Interrupt Controller
+ 8086 1161 82806AA PCI64 Hub APIC
+ 1200 Intel IXP1200 Network Processor
+ 172a 0000 AEP SSL Accelerator
+ 1209 82559ER
+ 1221 82092AA_0
+ 1222 82092AA_1
+ 1223 SAA7116
+ 1225 82452KX/GX [Orion]
+ 1226 82596 PRO/10 PCI
+ 1227 82865 EtherExpress PRO/100A
+ 1228 82556 EtherExpress PRO/100 Smart
+# the revision field differentiates between them (1-3 is 82557, 4-5 is 82558, 6-8 is 82559, 9 is 82559ER)
+ 1229 82557/8/9 [Ethernet Pro 100]
+ 0e11 3001 82559 Fast Ethernet LOM with Alert on LAN*
+ 0e11 3002 82559 Fast Ethernet LOM with Alert on LAN*
+ 0e11 3003 82559 Fast Ethernet LOM with Alert on LAN*
+ 0e11 3004 82559 Fast Ethernet LOM with Alert on LAN*
+ 0e11 3005 82559 Fast Ethernet LOM with Alert on LAN*
+ 0e11 3006 82559 Fast Ethernet LOM with Alert on LAN*
+ 0e11 3007 82559 Fast Ethernet LOM with Alert on LAN*
+ 0e11 b01e NC3120 Fast Ethernet NIC
+ 0e11 b01f NC3122 Fast Ethernet NIC (dual port)
+ 0e11 b02f NC1120 Ethernet NIC
+ 0e11 b04a Netelligent 10/100TX NIC with Wake on LAN
+ 0e11 b0c6 NC3161 Fast Ethernet NIC (embedded, WOL)
+ 0e11 b0c7 NC3160 Fast Ethernet NIC (embedded)
+ 0e11 b0d7 NC3121 Fast Ethernet NIC (WOL)
+ 0e11 b0dd NC3131 Fast Ethernet NIC (dual port)
+ 0e11 b0de NC3132 Fast Ethernet Module (dual port)
+ 0e11 b0e1 NC3133 Fast Ethernet Module (100-FX)
+ 0e11 b134 NC3163 Fast Ethernet NIC (embedded, WOL)
+ 0e11 b13c NC3162 Fast Ethernet NIC (embedded)
+ 0e11 b144 NC3123 Fast Ethernet NIC (WOL)
+ 0e11 b163 NC3134 Fast Ethernet NIC (dual port)
+ 0e11 b164 NC3135 Fast Ethernet Upgrade Module (dual port)
+ 0e11 b1a4 NC7131 Gigabit Server Adapter
+ 1014 005c 82558B Ethernet Pro 10/100
+ 1014 01bc 82559 Fast Ethernet LAN On Motherboard
+ 1014 01f1 10/100 Ethernet Server Adapter
+ 1014 01f2 10/100 Ethernet Server Adapter
+ 1014 0207 Ethernet Pro/100 S
+ 1014 0232 10/100 Dual Port Server Adapter
+ 1014 105c Netfinity 10/100
+ 1014 305c 10/100 EtherJet Management Adapter
+ 1014 405c 10/100 EtherJet Adapter with Alert on LAN
+ 1014 505c 10/100 EtherJet Secure Management Adapter
+ 1014 605c 10/100 EtherJet Secure Management Adapter
+ 1014 705c 10/100 Netfinity 10/100 Ethernet Security Adapter
+ 1014 805c 10/100 Netfinity 10/100 Ethernet Security Adapter
+ 1033 8000 PC-9821X-B06
+ 1033 8016 PK-UG-X006
+ 1033 801f PK-UG-X006
+ 1033 8026 PK-UG-X006
+ 1033 8063 82559-based Fast Ethernet Adapter
+ 1033 8064 82559-based Fast Ethernet Adapter
+ 103c 10c0 NetServer 10/100TX
+ 103c 10c3 NetServer 10/100TX
+ 103c 10ca NetServer 10/100TX
+ 103c 10cb NetServer 10/100TX
+ 103c 10e3 NetServer 10/100TX
+ 103c 10e4 NetServer 10/100TX
+ 103c 1200 NetServer 10/100TX
+ 10c3 1100 SmartEther100 SC1100
+ 10cf 1115 8255x-based Ethernet Adapter (10/100)
+ 10cf 1143 8255x-based Ethernet Adapter (10/100)
+ 1179 0001 8255x-based Ethernet Adapter (10/100)
+ 1179 0002 PCI FastEther LAN on Docker
+ 1179 0003 8255x-based Fast Ethernet
+ 1259 2560 AT-2560 100
+ 1259 2561 AT-2560 100 FX Ethernet Adapter
+ 1266 0001 NE10/100 Adapter
+ 144d 2501 SEM-2000 MiniPCI LAN Adapter
+ 144d 2502 SEM-2100IL MiniPCI LAN Adapter
+ 1668 1100 EtherExpress PRO/100B (TX) (MiniPCI Ethernet+Modem)
+ 8086 0001 EtherExpress PRO/100B (TX)
+ 8086 0002 EtherExpress PRO/100B (T4)
+ 8086 0003 EtherExpress PRO/10+
+ 8086 0004 EtherExpress PRO/100 WfM
+ 8086 0005 82557 10/100
+ 8086 0006 82557 10/100 with Wake on LAN
+ 8086 0007 82558 10/100 Adapter
+ 8086 0008 82558 10/100 with Wake on LAN
+ 8086 0009 EtherExpress PRO/100+
+ 8086 000a EtherExpress PRO/100+ Management Adapter
+ 8086 000b EtherExpress PRO/100+
+ 8086 000c EtherExpress PRO/100+ Management Adapter
+ 8086 000d EtherExpress PRO/100+ Alert On LAN II* Adapter
+ 8086 000e EtherExpress PRO/100+ Management Adapter with Alert On LAN*
+ 8086 000f EtherExpress PRO/100 Desktop Adapter
+ 8086 0010 EtherExpress PRO/100 S Management Adapter
+ 8086 0011 EtherExpress PRO/100 S Management Adapter
+ 8086 0012 EtherExpress PRO/100 S Advanced Management Adapter (D)
+ 8086 0013 EtherExpress PRO/100 S Advanced Management Adapter (E)
+ 8086 0030 EtherExpress PRO/100 Management Adapter with Alert On LAN* GC
+ 8086 0031 EtherExpress PRO/100 Desktop Adapter
+ 8086 0040 EtherExpress PRO/100 S Desktop Adapter
+ 8086 0041 EtherExpress PRO/100 S Desktop Adapter
+ 8086 0042 EtherExpress PRO/100 Desktop Adapter
+ 8086 0050 EtherExpress PRO/100 S Desktop Adapter
+ 8086 1009 EtherExpress PRO/100+ Server Adapter
+ 8086 100c EtherExpress PRO/100+ Server Adapter (PILA8470B)
+ 8086 1012 EtherExpress PRO/100 S Server Adapter (D)
+ 8086 1013 EtherExpress PRO/100 S Server Adapter (E)
+ 8086 1015 EtherExpress PRO/100 S Dual Port Server Adapter
+ 8086 1017 EtherExpress PRO/100+ Dual Port Server Adapter
+ 8086 1030 EtherExpress PRO/100+ Management Adapter with Alert On LAN* G Server
+ 8086 1040 EtherExpress PRO/100 S Server Adapter
+ 8086 1041 EtherExpress PRO/100 S Server Adapter
+ 8086 1042 EtherExpress PRO/100 Server Adapter
+ 8086 1050 EtherExpress PRO/100 S Server Adapter
+ 8086 1051 EtherExpress PRO/100 Server Adapter
+ 8086 1052 EtherExpress PRO/100 Server Adapter
+ 8086 10f0 EtherExpress PRO/100+ Dual Port Adapter
+ 8086 2009 EtherExpress PRO/100 S Mobile Adapter
+ 8086 200d EtherExpress PRO/100 Cardbus
+ 8086 200e EtherExpress PRO/100 LAN+V90 Cardbus Modem
+ 8086 200f EtherExpress PRO/100 SR Mobile Adapter
+ 8086 2010 EtherExpress PRO/100 S Mobile Combo Adapter
+ 8086 2013 EtherExpress PRO/100 SR Mobile Combo Adapter
+ 8086 2016 EtherExpress PRO/100 S Mobile Adapter
+ 8086 2017 EtherExpress PRO/100 S Combo Mobile Adapter
+ 8086 2018 EtherExpress PRO/100 SR Mobile Adapter
+ 8086 2019 EtherExpress PRO/100 SR Combo Mobile Adapter
+ 8086 2101 EtherExpress PRO/100 P Mobile Adapter
+ 8086 2102 EtherExpress PRO/100 SP Mobile Adapter
+ 8086 2103 EtherExpress PRO/100 SP Mobile Adapter
+ 8086 2104 EtherExpress PRO/100 SP Mobile Adapter
+ 8086 2105 EtherExpress PRO/100 SP Mobile Adapter
+ 8086 2106 EtherExpress PRO/100 P Mobile Adapter
+ 8086 2107 EtherExpress PRO/100 Network Connection
+ 8086 2108 EtherExpress PRO/100 Network Connection
+ 8086 2200 EtherExpress PRO/100 P Mobile Combo Adapter
+ 8086 2201 EtherExpress PRO/100 P Mobile Combo Adapter
+ 8086 2202 EtherExpress PRO/100 SP Mobile Combo Adapter
+ 8086 2203 EtherExpress PRO/100+ MiniPCI
+ 8086 2204 EtherExpress PRO/100+ MiniPCI
+ 8086 2205 EtherExpress PRO/100 SP Mobile Combo Adapter
+ 8086 2206 EtherExpress PRO/100 SP Mobile Combo Adapter
+ 8086 2207 EtherExpress PRO/100 SP Mobile Combo Adapter
+ 8086 2208 EtherExpress PRO/100 P Mobile Combo Adapter
+ 8086 2402 EtherExpress PRO/100+ MiniPCI
+ 8086 2407 EtherExpress PRO/100+ MiniPCI
+ 8086 2408 EtherExpress PRO/100+ MiniPCI
+ 8086 2409 EtherExpress PRO/100+ MiniPCI
+ 8086 240f EtherExpress PRO/100+ MiniPCI
+ 8086 2410 EtherExpress PRO/100+ MiniPCI
+ 8086 2411 EtherExpress PRO/100+ MiniPCI
+ 8086 2412 EtherExpress PRO/100+ MiniPCI
+ 8086 2413 EtherExpress PRO/100+ MiniPCI
+ 8086 3000 82559 Fast Ethernet LAN on Motherboard
+ 8086 3001 82559 Fast Ethernet LOM with Basic Alert on LAN*
+ 8086 3002 82559 Fast Ethernet LOM with Alert on LAN II*
+ 8086 3006 EtherExpress PRO/100 S Network Connection
+ 8086 3007 EtherExpress PRO/100 S Network Connection
+ 8086 3008 EtherExpress PRO/100 Network Connection
+ 8086 3010 EtherExpress PRO/100 S Network Connection
+ 8086 3011 EtherExpress PRO/100 S Network Connection
+ 8086 3012 EtherExpress PRO/100 Network Connection
+ 122d 430FX - 82437FX TSC [Triton I]
+ 122e 82371FB PIIX ISA [Triton I]
+ 1230 82371FB PIIX IDE [Triton I]
+ 1231 DSVD Modem
+ 1234 430MX - 82371MX Mobile PCI I/O IDE Xcelerator (MPIIX)
+ 1235 430MX - 82437MX Mob. System Ctrlr (MTSC) & 82438MX Data Path (MTDP)
+ 1237 440FX - 82441FX PMC [Natoma]
+ 1239 82371FB
+ 123b 82380PB
+ 123c 82380AB
+ 123d 683053 Programmable Interrupt Device
+ 123f 82466GX Integrated Hot-Plug Controller (IHPC)
+ 1240 752 AGP
+ 124b 82380FB
+ 1250 430HX - 82439HX TXC [Triton II]
+ 1360 82806AA PCI64 Hub PCI Bridge
+ 1361 82806AA PCI64 Hub Controller (HRes)
+ 8086 1361 82806AA PCI64 Hub Controller (HRes)
+ 8086 8000 82806AA PCI64 Hub Controller (HRes)
+ 1460 82870P2 P64H2 Hub PCI Bridge
+ 1461 82870P2 P64H2 I/OxAPIC
+ 1462 82870P2 P64H2 Hot Plug Controller
+ 1960 80960RP [i960RP Microprocessor]
+ 101e 0431 MegaRAID 431 RAID Controller
+ 101e 0438 MegaRAID 438 Ultra2 LVD RAID Controller
+ 101e 0466 MegaRAID 466 Express Plus RAID Controller
+ 101e 0467 MegaRAID 467 Enterprise 1500 RAID Controller
+ 101e 0490 MegaRAID 490 Express 300 RAID Controller
+ 101e 0762 MegaRAID 762 Express RAID Controller
+ 101e 09a0 PowerEdge Expandable RAID Controller 2/SC
+ 1028 0467 PowerEdge Expandable RAID Controller 2/DC
+ 1028 1111 PowerEdge Expandable RAID Controller 2/SC
+ 103c 03a2 MegaRAID
+ 103c 10c6 MegaRAID 438, HP NetRAID-3Si
+ 103c 10c7 MegaRAID T5, Integrated HP NetRAID
+ 103c 10cc MegaRAID, Integrated HP NetRAID
+ 103c 10cd HP NetRAID-1Si
+ 105a 0000 SuperTrak
+ 105a 2168 SuperTrak Pro
+ 105a 5168 SuperTrak66/100
+ 1111 1111 MegaRAID 466, PowerEdge Expandable RAID Controller 2/SC
+ 1111 1112 PowerEdge Expandable RAID Controller 2/SC
+ 113c 03a2 MegaRAID
+ 1962 80960RM [i960RM Microprocessor]
+ 105a 0000 SuperTrak SX6000 I2O CPU
+ 1a21 82840 840 (Carmel) Chipset Host Bridge (Hub A)
+ 1a23 82840 840 (Carmel) Chipset AGP Bridge
+ 1a24 82840 840 (Carmel) Chipset PCI Bridge (Hub B)
+ 1a30 82845 845 (Brookdale) Chipset Host Bridge
+ 1a31 82845 845 (Brookdale) Chipset AGP Bridge
+ 2410 82801AA ISA Bridge (LPC)
+ 2411 82801AA IDE
+ 2412 82801AA USB
+ 2413 82801AA SMBus
+ 2415 82801AA AC'97 Audio
+ 1028 0095 Precision Workstation 220 Integrated Digital Audio
+ 11d4 0040 SoundMAX Integrated Digital Audio
+ 11d4 0048 SoundMAX Integrated Digital Audio
+ 11d4 5340 SoundMAX Integrated Digital Audio
+ 2416 82801AA AC'97 Modem
+ 2418 82801AA PCI Bridge
+ 2420 82801AB ISA Bridge (LPC)
+ 2421 82801AB IDE
+ 2422 82801AB USB
+ 2423 82801AB SMBus
+ 2425 82801AB AC'97 Audio
+ 11d4 0040 SoundMAX Integrated Digital Audio
+ 11d4 0048 SoundMAX Integrated Digital Audio
+ 2426 82801AB AC'97 Modem
+ 2428 82801AB PCI Bridge
+ 2440 82801BA ISA Bridge (LPC)
+ 2442 82801BA/BAM USB (Hub #1)
+ 104d 80df Vaio PCG-FX403
+ 147b 0507 TH7II-RAID
+ 2443 82801BA/BAM SMBus
+ 1043 8027 TUSL2-C Mainboard
+ 104d 80df Vaio PCG-FX403
+ 147b 0507 TH7II-RAID
+ 2444 82801BA/BAM USB (Hub #2)
+ 104d 80df Vaio PCG-FX403
+ 147b 0507 TH7II-RAID
+ 2445 82801BA/BAM AC'97 Audio
+ 104d 80df Vaio PCG-FX403
+ 1462 3370 STAC9721 AC
+ 147b 0507 TH7II-RAID
+ 2446 82801BA/BAM AC'97 Modem
+ 104d 80df Vaio PCG-FX403
+ 2448 82801BAM/CAM PCI Bridge
+ 2449 82801BA/BAM/CA/CAM Ethernet Controller
+ 0e11 0012 EtherExpress PRO/100 VM
+ 0e11 0091 EtherExpress PRO/100 VE
+ 1014 01ce EtherExpress PRO/100 VE
+ 1014 01dc EtherExpress PRO/100 VE
+ 1014 01eb EtherExpress PRO/100 VE
+ 1014 01ec EtherExpress PRO/100 VE
+ 1014 0202 EtherExpress PRO/100 VE
+ 1014 0205 EtherExpress PRO/100 VE
+ 1014 0217 EtherExpress PRO/100 VE
+ 1014 0234 EtherExpress PRO/100 VE
+ 1014 023d EtherExpress PRO/100 VE
+ 1014 0244 EtherExpress PRO/100 VE
+ 1014 0245 EtherExpress PRO/100 VE
+ 109f 315d EtherExpress PRO/100 VE
+ 109f 3181 EtherExpress PRO/100 VE
+ 1186 7801 EtherExpress PRO/100 VE
+ 144d 2602 HomePNA 1M CNR
+ 8086 3010 EtherExpress PRO/100 VE
+ 8086 3011 EtherExpress PRO/100 VM
+ 8086 3012 82562EH based Phoneline
+ 8086 3013 EtherExpress PRO/100 VE
+ 8086 3014 EtherExpress PRO/100 VM
+ 8086 3015 82562EH based Phoneline
+ 8086 3016 EtherExpress PRO/100 P Mobile Combo
+ 8086 3017 EtherExpress PRO/100 P Mobile
+ 8086 3018 EtherExpress PRO/100
+ 244a 82801BAM IDE U100
+ 1025 1016 Travelmate 612TX
+ 104d 80df Vaio PCG-FX403
+ 244b 82801BA IDE U100
+ 1043 8027 TUSL2-C Mainboard
+ 147b 0507 TH7II-RAID
+ 244c 82801BAM ISA Bridge (LPC)
+ 244e 82801BA/CA/DB PCI Bridge
+ 2450 82801E ISA Bridge (LPC)
+ 2452 82801E USB
+ 2453 82801E SMBus
+ 2459 82801E Ethernet Controller 0
+ 245b 82801E IDE U100
+ 245d 82801E Ethernet Controller 1
+ 245e 82801E PCI Bridge
+ 2480 82801CA ISA Bridge (LPC)
+ 2482 82801CA/CAM USB (Hub #1)
+ 1014 0220 ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 2483 82801CA/CAM SMBus
+ 1014 0220 ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 2484 82801CA/CAM USB (Hub #2)
+ 1014 0220 ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 2485 82801CA/CAM AC'97 Audio
+ 1014 0222 ThinkPad T23 (2647-4MG)
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 2486 82801CA/CAM AC'97 Modem
+ 1014 0223 ThinkPad A30p (2653-64G)
+ 1014 0503 ThinkPad R31 2656BBG
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 134d 4c21 Dell Inspiron 2100 internal modem
+ 2487 82801CA/CAM USB (Hub #3)
+ 1014 0220 ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 248a 82801CAM IDE U100
+ 1014 0220 ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 248b 82801CA IDE U100
+ 248c 82801CAM ISA Bridge (LPC)
+ 24c0 82801DB ISA Bridge (LPC)
+ 24c2 82801DB USB (Hub #1)
+ 24c3 82801DB SMBus
+ 24c4 82801DB USB (Hub #2)
+ 24c5 82801DB AC'97 Audio
+ 24c6 82801DB AC'97 Modem
+ 24c7 82801DB USB (Hub #3)
+ 24cb 82801DB ICH4 IDE
+ 24cd 82801DB USB EHCI Controller
+ 2500 82820 820 (Camino) Chipset Host Bridge (MCH)
+ 1028 0095 Precision Workstation 220 Chipset
+ 1043 801c P3C-2000 system chipset
+ 2501 82820 820 (Camino) Chipset Host Bridge (MCH)
+ 1043 801c P3C-2000 system chipset
+ 250b 82820 820 (Camino) Chipset Host Bridge
+ 250f 82820 820 (Camino) Chipset AGP Bridge
+ 2520 82805AA MTH Memory Translator Hub
+ 2521 82804AA MRH-S Memory Repeater Hub for SDRAM
+ 2530 82850 850 (Tehama) Chipset Host Bridge (MCH)
+ 147b 0507 TH7II-RAID
+ 2531 82860 860 (Wombat) Chipset Host Bridge (MCH)
+ 2532 82850 850 (Tehama) Chipset AGP Bridge
+ 2533 82860 860 (Wombat) Chipset AGP Bridge
+ 2534 82860 860 (Wombat) Chipset PCI Bridge
+ 2540 e7500 [Plumas] DRAM Controller
+ 2541 e7500 [Plumas] DRAM Controller Error Reporting
+ 2543 e7500 [Plumas] HI_B Virtual PCI Bridge (F0)
+ 2544 e7500 [Plumas] HI_B Virtual PCI Bridge (F1)
+ 2545 e7500 [Plumas] HI_C Virtual PCI Bridge (F0)
+ 2546 e7500 [Plumas] HI_C Virtual PCI Bridge (F1)
+ 2547 e7500 [Plumas] HI_D Virtual PCI Bridge (F0)
+ 2548 e7500 [Plumas] HI_D Virtual PCI Bridge (F1)
+ 2560 82845G/GL [Brookdale-G] Chipset Host Bridge
+ 2561 82845G/GL [Brookdale-G] Chipset AGP Bridge
+ 2562 82845G/GL [Brookdale-G] Chipset Integrated Graphics Device
+ 3092 Integrated RAID
+ 3575 82830 830 Chipset Host Bridge
+ 1014 021d ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+ 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+ 3576 82830 830 Chipset AGP Bridge
+ 3577 82830 CGC [Chipset Graphics Controller]
+ 3578 82830 830 Chipset Host Bridge
+ 5200 EtherExpress PRO/100 Intelligent Server
+ 5201 EtherExpress PRO/100 Intelligent Server
+ 8086 0001 EtherExpress PRO/100 Server Ethernet Adapter
+ 530d 80310 IOP [IO Processor]
+ 7000 82371SB PIIX3 ISA [Natoma/Triton II]
+ 7010 82371SB PIIX3 IDE [Natoma/Triton II]
+ 7020 82371SB PIIX3 USB [Natoma/Triton II]
+ 7030 430VX - 82437VX TVX [Triton VX]
+ 7100 430TX - 82439TX MTXC
+ 7110 82371AB/EB/MB PIIX4 ISA
+ 7111 82371AB/EB/MB PIIX4 IDE
+ 7112 82371AB/EB/MB PIIX4 USB
+ 7113 82371AB/EB/MB PIIX4 ACPI
+ 7120 82810 GMCH [Graphics Memory Controller Hub]
+ 7121 82810 CGC [Chipset Graphics Controller]
+ 7122 82810 DC-100 GMCH [Graphics Memory Controller Hub]
+ 7123 82810 DC-100 CGC [Chipset Graphics Controller]
+ 7124 82810E DC-133 GMCH [Graphics Memory Controller Hub]
+ 7125 82810E DC-133 CGC [Chipset Graphics Controller]
+ 7126 82810 DC-133 System and Graphics Controller
+ 7128 82810-M DC-100 System and Graphics Controller
+ 712a 82810-M DC-133 System and Graphics Controller
+ 7180 440LX/EX - 82443LX/EX Host bridge
+ 7181 440LX/EX - 82443LX/EX AGP bridge
+ 7190 440BX/ZX/DX - 82443BX/ZX/DX Host bridge
+ 0e11 0500 Armada 1750 Laptop System Chipset
+ 1179 0001 Toshiba Tecra 8100 Laptop System Chipset
+ 7191 440BX/ZX/DX - 82443BX/ZX/DX AGP bridge
+ 7192 440BX/ZX/DX - 82443BX/ZX/DX Host bridge (AGP disabled)
+ 0e11 0460 Armada 1700 Laptop System Chipset
+ 7194 82440MX Host Bridge
+ 7195 82440MX AC'97 Audio Controller
+ 10cf 1099 QSound_SigmaTel Stac97 PCI Audio
+ 11d4 0040 SoundMAX Integrated Digital Audio
+ 11d4 0048 SoundMAX Integrated Digital Audio
+ 7196 82440MX AC'97 Modem Controller
+ 7198 82440MX ISA Bridge
+ 7199 82440MX EIDE Controller
+ 719a 82440MX USB Universal Host Controller
+ 719b 82440MX Power Management Controller
+ 71a0 440GX - 82443GX Host bridge
+ 71a1 440GX - 82443GX AGP bridge
+ 71a2 440GX - 82443GX Host bridge (AGP disabled)
+ 7600 82372FB PIIX5 ISA
+ 7601 82372FB PIIX5 IDE
+ 7602 82372FB PIIX5 USB
+ 7603 82372FB PIIX5 SMBus
+ 7800 i740
+ 003d 0008 Starfighter AGP
+ 003d 000b Starfighter AGP
+ 1092 0100 Stealth II G460
+ 10b4 201a Lightspeed 740
+ 10b4 202f Lightspeed 740
+ 8086 0000 Terminator 2x/i
+ 8086 0100 Intel740 Graphics Accelerator
+ 84c4 450KX/GX [Orion] - 82454KX/GX PCI bridge
+ 84c5 450KX/GX [Orion] - 82453KX/GX Memory controller
+ 84ca 450NX - 82451NX Memory & I/O Controller
+ 84cb 450NX - 82454NX/84460GX PCI Expander Bridge
+ 84e0 460GX - 84460GX System Address Controller (SAC)
+ 84e1 460GX - 84460GX System Data Controller (SDC)
+ 84e2 460GX - 84460GX AGP Bridge (GXB function 2)
+ 84e3 460GX - 84460GX Memory Address Controller (MAC)
+ 84e4 460GX - 84460GX Memory Data Controller (MDC)
+ 84e6 460GX - 82466GX Wide and fast PCI eXpander Bridge (WXB)
+ 84ea 460GX - 84460GX AGP Bridge (GXB function 1)
+ 9621 Integrated RAID
+ 9622 Integrated RAID
+ 9641 Integrated RAID
+ 96a1 Integrated RAID
+ b152 21152 PCI-to-PCI Bridge
+# observed, and documented in Intel revision note; new mask of 1011:0026
+ b154 21154 PCI-to-PCI Bridge
+ b555 21555 Non transparent PCI-to-PCI Bridge
+ e4bf 1000 CC8-1-BLUES
+ ffff 450NX/GX [Orion] - 82453KX/GX Memory controller [BUG]
+8800 Trigem Computer Inc.
+ 2008 Video assistent component
+8866 T-Square Design Inc.
+8888 Silicon Magic
+8e0e Computone Corporation
+8e2e KTI
+ 3000 ET32P2
+9004 Adaptec
+ 1078 AIC-7810
+ 1160 AIC-1160 [Family Fibre Channel Adapter]
+ 2178 AIC-7821
+ 3860 AHA-2930CU
+ 3b78 AHA-4844W/4844UW
+ 5075 AIC-755x
+ 5078 AHA-7850
+ 9004 7850 AHA-2904/Integrated AIC-7850
+ 5175 AIC-755x
+ 5178 AIC-7851
+ 5275 AIC-755x
+ 5278 AIC-7852
+ 5375 AIC-755x
+ 5378 AIC-7850
+ 5475 AIC-755x
+ 5478 AIC-7850
+ 5575 AVA-2930
+ 5578 AIC-7855
+ 5647 ANA-7711 TCP Offload Engine
+ 5675 AIC-755x
+ 5678 AIC-7856
+ 5775 AIC-755x
+ 5778 AIC-7850
+ 5800 AIC-5800
+ 5900 ANA-5910/5930/5940 ATM155 & 25 LAN Adapter
+ 5905 ANA-5910A/5930A/5940A ATM Adapter
+ 6038 AIC-3860
+ 6075 AIC-1480 / APA-1480
+ 9004 7560 AIC-1480 / APA-1480 Cardbus
+ 6078 AIC-7860
+ 6178 AIC-7861
+ 9004 7861 AHA-2940AU Single
+ 6278 AIC-7860
+ 6378 AIC-7860
+ 6478 AIC-786x
+ 6578 AIC-786x
+ 6678 AIC-786x
+ 6778 AIC-786x
+ 6915 ANA620xx/ANA69011A
+ 9004 0008 ANA69011A/TX 10/100
+ 9004 0009 ANA69011A/TX 10/100
+ 9004 0010 ANA62022 2-port 10/100
+ 9004 0018 ANA62044 4-port 10/100
+ 9004 0019 ANA62044 4-port 10/100
+ 9004 0020 ANA62022 2-port 10/100
+ 9004 0028 ANA69011A/TX 10/100
+ 9004 8008 ANA69011A/TX 64 bit 10/100
+ 9004 8009 ANA69011A/TX 64 bit 10/100
+ 9004 8010 ANA62022 2-port 64 bit 10/100
+ 9004 8018 ANA62044 4-port 64 bit 10/100
+ 9004 8019 ANA62044 4-port 64 bit 10/100
+ 9004 8020 ANA62022 2-port 64 bit 10/100
+ 9004 8028 ANA69011A/TX 64 bit 10/100
+ 7078 AHA-294x / AIC-7870
+ 7178 AHA-2940/2940W / AIC-7871
+ 7278 AHA-3940/3940W / AIC-7872
+ 7378 AHA-3985 / AIC-7873
+ 7478 AHA-2944/2944W / AIC-7874
+ 7578 AHA-3944/3944W / AIC-7875
+ 7678 AHA-4944W/UW / AIC-7876
+ 7778 AIC-787x
+ 7810 AIC-7810
+ 7815 AIC-7815 RAID+Memory Controller IC
+ 9004 7815 ARO-1130U2 RAID Controller
+ 9004 7840 AIC-7815 RAID+Memory Controller IC
+ 7850 AIC-7850
+ 7855 AHA-2930
+ 7860 AIC-7860
+ 7870 AIC-7870
+ 7871 AHA-2940
+ 7872 AHA-3940
+ 7873 AHA-3980
+ 7874 AHA-2944
+ 7880 AIC-7880P
+ 7890 AIC-7890
+ 7891 AIC-789x
+ 7892 AIC-789x
+ 7893 AIC-789x
+ 7894 AIC-789x
+ 7895 AHA-2940U/UW / AHA-39xx / AIC-7895
+ 9004 7890 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+ 9004 7891 AHA-2940U/2940UW Dual
+ 9004 7892 AHA-3940AU/AUW/AUWD/UWD
+ 9004 7894 AHA-3944AUWD
+ 9004 7895 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+ 9004 7896 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+ 9004 7897 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+ 7896 AIC-789x
+ 7897 AIC-789x
+ 8078 AIC-7880U
+ 9004 7880 AIC-7880P Ultra/Ultra Wide SCSI Chipset
+ 8178 AHA-2940U/UW/D / AIC-7881U
+ 9004 7881 AHA-2940UW SCSI Host Adapter
+ 8278 AHA-3940U/UW/UWD / AIC-7882U
+ 8378 AHA-3940U/UW / AIC-7883U
+ 8478 AHA-2944UW / AIC-7884U
+ 8578 AHA-3944U/UWD / AIC-7885
+ 8678 AHA-4944UW / AIC-7886
+ 8778 AHA-2940UW Pro / AIC-788x
+ 9004 7887 2940UW Pro Ultra-Wide SCSI Controller
+ 8878 AHA-2930UW / AIC-7888
+ 9004 7888 AHA-2930UW SCSI Controller
+ 8b78 ABA-1030
+ ec78 AHA-4944W/UW
+9005 Adaptec
+ 0010 AHA-2940U2/U2W
+ 9005 2180 AHA-2940U2 SCSI Controller
+ 9005 8100 AHA-2940U2B SCSI Controller
+ 9005 a180 AHA-2940U2W SCSI Controller
+ 9005 e100 AHA-2950U2B SCSI Controller
+ 0011 AHA-2930U2
+ 0013 78902
+ 9005 0003 AAA-131U2 Array1000 1 Channel RAID Controller
+ 001f AHA-2940U2/U2W / 7890/7891
+ 9005 000f 2940U2W SCSI Controller
+ 9005 a180 2940U2W SCSI Controller
+ 0020 AIC-7890
+ 002f AIC-7890
+ 0030 AIC-7890
+ 003f AIC-7890
+ 0050 AHA-3940U2x/395U2x
+ 9005 f500 AHA-3950U2B
+ 0051 AHA-3950U2D
+ 9005 b500 AHA-3950U2D
+ 0053 AIC-7896 SCSI Controller
+ 9005 ffff AIC-7896 SCSI Controller mainboard implementation
+ 005f AIC-7896U2/7897U2
+ 0080 AIC-7892A U160/m
+ 0e11 e2a0 Compaq 64-Bit/66MHz Wide Ultra3 SCSI Adapter
+ 9005 62a0 29160N Ultra160 SCSI Controller
+ 9005 e220 29160LP Low Profile Ultra160 SCSI Controller
+ 9005 e2a0 29160 Ultra160 SCSI Controller
+ 0081 AIC-7892B U160/m
+ 9005 62a1 19160 Ultra160 SCSI Controller
+ 0083 AIC-7892D U160/m
+ 008f AIC-7892P U160/m
+ 00c0 AHA-3960D / AIC-7899A U160/m
+ 0e11 f620 Compaq 64-Bit/66MHz Dual Channel Wide Ultra3 SCSI Adapter
+ 9005 f620 AHA-3960D U160/m
+ 00c1 AIC-7899B U160/m
+ 00c3 AIC-7899D U160/m
+ 00c5 RAID subsystem HBA
+ 00cf AIC-7899P U160/m
+ 0285 AAC-RAID
+ 1028 0287 PowerEdge Expandable RAID Controller 320/DC
+907f Atronics
+ 2015 IDE-2015PL
+919a Gigapixel Corp
+9412 Holtek
+ 6565 6565
+9699 Omni Media Technology Inc
+ 6565 6565
+9710 NetMos Technology
+ 9815 VScom 021H-EP2 2 port parallel adaptor
+ 9835 222N-2 I/O Card (2S+1P)
+a0a0 AOPEN Inc.
+a0f1 UNISYS Corporation
+a200 NEC Corporation
+a259 Hewlett Packard
+a25b Hewlett Packard GmbH PL24-MKT
+a304 Sony
+a727 3Com Corporation
+aa42 Scitex Digital Video
+ac1e Digital Receiver Technology Inc
+b1b3 Shiva Europe Limited
+c001 TSI Telsys
+c0a9 Micron/Crucial Technology
+c0de Motorola
+c0fe Motion Engineering, Inc.
+ca50 Varian Australia Pty Ltd
+cafe Chrysalis-ITS
+cccc Catapult Communications
+d4d4 Dy4 Systems Inc
+ 0601 PCI Mezzanine Card
+d531 I+ME ACTIA GmbH
+d84d Exsys
+dead Indigita Corporation
+e000 Winbond
+ e000 W89C940
+e159 Tiger Jet Network Inc.
+ 0001 Model 300 128k
+ 0059 0001 128k ISDN-S/T Adapter
+ 0059 0003 128k ISDN-U Adapter
+ 0002 Tiger100APC ISDN chipset
+e4bf EKF Elektronik GmbH
+ea01 Eagle Technology
+eabb Aashima Technology B.V.
+eace Endace Measurement Systems, Ltd
+ 3100 DAG 3.10 OC-3/OC-12
+ 3200 DAG 3.2x OC-3/OC-12
+ 320e DAG 3.2E Fast Ethernet
+ 340e DAG 3.4E Fast Ethernet
+ 341e DAG 3.41E Fast Ethernet
+ 3500 DAG 3.5 OC-3/OC-12
+ 351c DAG 3.5ECM Fast Ethernet
+ 4100 DAG 4.10 OC-48
+ 4110 DAG 4.11 OC-48
+ 4220 DAG 4.2 OC-48
+ 422e DAG 4.2E Dual Gigabit Ethernet
+ec80 Belkin Corporation
+ ec00 F5D6000
+ecc0 Echo Corporation
+edd8 ARK Logic Inc
+ a091 1000PV [Stingray]
+ a099 2000PV [Stingray]
+ a0a1 2000MT
+ a0a9 2000MI
+fa57 Fast Search & Transfer ASA
+febd Ultraview Corp.
+feda Epigram Inc
+fffe VMWare Inc
+ 0710 Virtual SVGA
+ffff Illegal Vendor ID
+
+
+# List of known device classes, subclasses and programming interfaces
+
+# Syntax:
+# C class class_name
+# subclass subclass_name <-- single tab
+# prog-if prog-if_name <-- two tabs
+
+C 00 Unclassified device
+ 00 Non-VGA unclassified device
+ 01 VGA compatible unclassified device
+C 01 Mass storage controller
+ 00 SCSI storage controller
+ 01 IDE interface
+ 02 Floppy disk controller
+ 03 IPI bus controller
+ 04 RAID bus controller
+ 80 Unknown mass storage controller
+C 02 Network controller
+ 00 Ethernet controller
+ 01 Token ring network controller
+ 02 FDDI network controller
+ 03 ATM network controller
+ 04 ISDN controller
+ 80 Network controller
+C 03 Display controller
+ 00 VGA compatible controller
+ 00 VGA
+ 01 8514
+ 01 XGA compatible controller
+ 02 3D controller
+ 80 Display controller
+C 04 Multimedia controller
+ 00 Multimedia video controller
+ 01 Multimedia audio controller
+ 02 Computer telephony device
+ 80 Multimedia controller
+C 05 Memory controller
+ 00 RAM memory
+ 01 FLASH memory
+ 80 Memory controller
+C 06 Bridge
+ 00 Host bridge
+ 01 ISA bridge
+ 02 EISA bridge
+ 03 MicroChannel bridge
+ 04 PCI bridge
+ 00 Normal decode
+ 01 Subtractive decode
+ 05 PCMCIA bridge
+ 06 NuBus bridge
+ 07 CardBus bridge
+ 08 RACEway bridge
+ 00 Transparent mode
+ 01 Endpoint mode
+ 09 Semi-transparent PCI-to-PCI bridge
+ 40 Primary bus towards host CPU
+ 80 Secondary bus towards host CPU
+ 0a InfiniBand to PCI host bridge
+ 80 Bridge
+C 07 Communication controller
+ 00 Serial controller
+ 00 8250
+ 01 16450
+ 02 16550
+ 03 16650
+ 04 16750
+ 05 16850
+ 06 16950
+ 01 Parallel controller
+ 00 SPP
+ 01 BiDir
+ 02 ECP
+ 03 IEEE1284
+ fe IEEE1284 Target
+ 02 Multiport serial controller
+ 03 Modem
+ 00 Generic
+ 01 Hayes/16450
+ 02 Hayes/16550
+ 03 Hayes/16650
+ 04 Hayes/16750
+ 80 Communication controller
+C 08 Generic system peripheral
+ 00 PIC
+ 00 8259
+ 01 ISA PIC
+ 02 EISA PIC
+ 10 IO-APIC
+ 20 IO(X)-APIC
+ 01 DMA controller
+ 00 8237
+ 01 ISA DMA
+ 02 EISA DMA
+ 02 Timer
+ 00 8254
+ 01 ISA Timer
+ 02 EISA Timers
+ 03 RTC
+ 00 Generic
+ 01 ISA RTC
+ 04 PCI Hot-plug controller
+ 80 System peripheral
+C 09 Input device controller
+ 00 Keyboard controller
+ 01 Digitizer Pen
+ 02 Mouse controller
+ 03 Scanner controller
+ 04 Gameport controller
+ 00 Generic
+ 10 Extended
+ 80 Input device controller
+C 0a Docking station
+ 00 Generic Docking Station
+ 80 Docking Station
+C 0b Processor
+ 00 386
+ 01 486
+ 02 Pentium
+ 10 Alpha
+ 20 Power PC
+ 30 MIPS
+ 40 Co-processor
+C 0c Serial bus controller
+ 00 FireWire (IEEE 1394)
+ 00 Generic
+ 10 OHCI
+ 01 ACCESS Bus
+ 02 SSA
+ 03 USB Controller
+ 00 UHCI
+ 10 OHCI
+ 20 EHCI
+ 80 Unspecified
+ fe USB Device
+ 04 Fibre Channel
+ 05 SMBus
+ 06 InfiniBand
+C 0d Wireless controller
+ 00 IRDA controller
+ 01 Consumer IR controller
+ 10 RF controller
+ 80 Wireless controller
+C 0e Intelligent controller
+ 00 I2O
+C 0f Satellite communications controller
+ 00 Satellite TV controller
+ 01 Satellite audio communication controller
+ 03 Satellite voice communication controller
+ 04 Satellite data communication controller
+C 10 Encryption controller
+ 00 Network and computing encryption device
+ 10 Entertainment encryption device
+ 80 Encryption controller
+C 11 Signal processing controller
+ 00 DPIO module
+ 01 Performance counters
+ 10 Communication synchronizer
+ 80 Signal processing controller
diff --git a/xen/drivers/pci/proc.c b/xen/drivers/pci/proc.c
new file mode 100644
index 0000000000..5e04ad7b33
--- /dev/null
+++ b/xen/drivers/pci/proc.c
@@ -0,0 +1,572 @@
+/*
+ * $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $
+ *
+ * Procfs interface for the PCI bus.
+ *
+ * Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+
+#define PCI_CFG_SPACE_SIZE 256
+
+static loff_t
+proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
+{
+ loff_t new;
+
+ switch (whence) {
+ case 0:
+ new = off;
+ break;
+ case 1:
+ new = file->f_pos + off;
+ break;
+ case 2:
+ new = PCI_CFG_SPACE_SIZE + off;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+ return -EINVAL;
+ return (file->f_pos = new);
+}
+
+static ssize_t
+proc_bus_pci_read(struct file *file, char *buf, size_t nbytes, loff_t *ppos)
+{
+ const struct inode *ino = file->f_dentry->d_inode;
+ const struct proc_dir_entry *dp = ino->u.generic_ip;
+ struct pci_dev *dev = dp->data;
+ unsigned int pos = *ppos;
+ unsigned int cnt, size;
+
+ /*
+ * Normal users can read only the standardized portion of the
+ * configuration space as several chips lock up when trying to read
+ * undefined locations (think of Intel PIIX4 as a typical example).
+ */
+
+ if (capable(CAP_SYS_ADMIN))
+ size = PCI_CFG_SPACE_SIZE;
+ else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ size = 128;
+ else
+ size = 64;
+
+ if (pos >= size)
+ return 0;
+ if (nbytes >= size)
+ nbytes = size;
+ if (pos + nbytes > size)
+ nbytes = size - pos;
+ cnt = nbytes;
+
+ if (!access_ok(VERIFY_WRITE, buf, cnt))
+ return -EINVAL;
+
+ if ((pos & 1) && cnt) {
+ unsigned char val;
+ pci_read_config_byte(dev, pos, &val);
+ __put_user(val, buf);
+ buf++;
+ pos++;
+ cnt--;
+ }
+
+ if ((pos & 3) && cnt > 2) {
+ unsigned short val;
+ pci_read_config_word(dev, pos, &val);
+ __put_user(cpu_to_le16(val), (unsigned short *) buf);
+ buf += 2;
+ pos += 2;
+ cnt -= 2;
+ }
+
+ while (cnt >= 4) {
+ unsigned int val;
+ pci_read_config_dword(dev, pos, &val);
+ __put_user(cpu_to_le32(val), (unsigned int *) buf);
+ buf += 4;
+ pos += 4;
+ cnt -= 4;
+ }
+
+ if (cnt >= 2) {
+ unsigned short val;
+ pci_read_config_word(dev, pos, &val);
+ __put_user(cpu_to_le16(val), (unsigned short *) buf);
+ buf += 2;
+ pos += 2;
+ cnt -= 2;
+ }
+
+ if (cnt) {
+ unsigned char val;
+ pci_read_config_byte(dev, pos, &val);
+ __put_user(val, buf);
+ buf++;
+ pos++;
+ cnt--;
+ }
+
+ *ppos = pos;
+ return nbytes;
+}
+
+static ssize_t
+proc_bus_pci_write(struct file *file, const char *buf, size_t nbytes, loff_t *ppos)
+{
+ const struct inode *ino = file->f_dentry->d_inode;
+ const struct proc_dir_entry *dp = ino->u.generic_ip;
+ struct pci_dev *dev = dp->data;
+ int pos = *ppos;
+ int cnt;
+
+ if (pos >= PCI_CFG_SPACE_SIZE)
+ return 0;
+ if (nbytes >= PCI_CFG_SPACE_SIZE)
+ nbytes = PCI_CFG_SPACE_SIZE;
+ if (pos + nbytes > PCI_CFG_SPACE_SIZE)
+ nbytes = PCI_CFG_SPACE_SIZE - pos;
+ cnt = nbytes;
+
+ if (!access_ok(VERIFY_READ, buf, cnt))
+ return -EINVAL;
+
+ if ((pos & 1) && cnt) {
+ unsigned char val;
+ __get_user(val, buf);
+ pci_write_config_byte(dev, pos, val);
+ buf++;
+ pos++;
+ cnt--;
+ }
+
+ if ((pos & 3) && cnt > 2) {
+ unsigned short val;
+ __get_user(val, (unsigned short *) buf);
+ pci_write_config_word(dev, pos, le16_to_cpu(val));
+ buf += 2;
+ pos += 2;
+ cnt -= 2;
+ }
+
+ while (cnt >= 4) {
+ unsigned int val;
+ __get_user(val, (unsigned int *) buf);
+ pci_write_config_dword(dev, pos, le32_to_cpu(val));
+ buf += 4;
+ pos += 4;
+ cnt -= 4;
+ }
+
+ if (cnt >= 2) {
+ unsigned short val;
+ __get_user(val, (unsigned short *) buf);
+ pci_write_config_word(dev, pos, le16_to_cpu(val));
+ buf += 2;
+ pos += 2;
+ cnt -= 2;
+ }
+
+ if (cnt) {
+ unsigned char val;
+ __get_user(val, buf);
+ pci_write_config_byte(dev, pos, val);
+ buf++;
+ pos++;
+ cnt--;
+ }
+
+ *ppos = pos;
+ return nbytes;
+}
+
+struct pci_filp_private {
+ enum pci_mmap_state mmap_state;
+ int write_combine;
+};
+
+static int proc_bus_pci_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ const struct proc_dir_entry *dp = inode->u.generic_ip;
+ struct pci_dev *dev = dp->data;
+#ifdef HAVE_PCI_MMAP
+ struct pci_filp_private *fpriv = file->private_data;
+#endif /* HAVE_PCI_MMAP */
+ int ret = 0;
+
+ switch (cmd) {
+ case PCIIOC_CONTROLLER:
+ ret = pci_controller_num(dev);
+ break;
+
+#ifdef HAVE_PCI_MMAP
+ case PCIIOC_MMAP_IS_IO:
+ fpriv->mmap_state = pci_mmap_io;
+ break;
+
+ case PCIIOC_MMAP_IS_MEM:
+ fpriv->mmap_state = pci_mmap_mem;
+ break;
+
+ case PCIIOC_WRITE_COMBINE:
+ if (arg)
+ fpriv->write_combine = 1;
+ else
+ fpriv->write_combine = 0;
+ break;
+
+#endif /* HAVE_PCI_MMAP */
+
+ default:
+ ret = -EINVAL;
+ break;
+ };
+
+ return ret;
+}
+
+#ifdef HAVE_PCI_MMAP
+static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ const struct proc_dir_entry *dp = inode->u.generic_ip;
+ struct pci_dev *dev = dp->data;
+ struct pci_filp_private *fpriv = file->private_data;
+ int ret;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ ret = pci_mmap_page_range(dev, vma,
+ fpriv->mmap_state,
+ fpriv->write_combine);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int proc_bus_pci_open(struct inode *inode, struct file *file)
+{
+ struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL);
+
+ if (!fpriv)
+ return -ENOMEM;
+
+ fpriv->mmap_state = pci_mmap_io;
+ fpriv->write_combine = 0;
+
+ file->private_data = fpriv;
+
+ return 0;
+}
+
+static int proc_bus_pci_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ file->private_data = NULL;
+
+ return 0;
+}
+#endif /* HAVE_PCI_MMAP */
+
+static struct file_operations proc_bus_pci_operations = {
+ llseek: proc_bus_pci_lseek,
+ read: proc_bus_pci_read,
+ write: proc_bus_pci_write,
+ ioctl: proc_bus_pci_ioctl,
+#ifdef HAVE_PCI_MMAP
+ open: proc_bus_pci_open,
+ release: proc_bus_pci_release,
+ mmap: proc_bus_pci_mmap,
+#ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA
+ get_unmapped_area: get_pci_unmapped_area,
+#endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */
+#endif /* HAVE_PCI_MMAP */
+};
+
+#if BITS_PER_LONG == 32
+#define LONG_FORMAT "\t%08lx"
+#else
+#define LONG_FORMAT "\t%16lx"
+#endif
+
+/* iterator */
+static void *pci_seq_start(struct seq_file *m, loff_t *pos)
+{
+ struct list_head *p = &pci_devices;
+ loff_t n = *pos;
+
+ /* XXX: surely we need some locking for traversing the list? */
+ while (n--) {
+ p = p->next;
+ if (p == &pci_devices)
+ return NULL;
+ }
+ return p;
+}
+static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct list_head *p = v;
+ (*pos)++;
+ return p->next != &pci_devices ? p->next : NULL;
+}
+static void pci_seq_stop(struct seq_file *m, void *v)
+{
+ /* release whatever locks we need */
+}
+
+static int show_device(struct seq_file *m, void *v)
+{
+ struct list_head *p = v;
+ const struct pci_dev *dev;
+ const struct pci_driver *drv;
+ int i;
+
+ if (p == &pci_devices)
+ return 0;
+
+ dev = pci_dev_g(p);
+ drv = pci_dev_driver(dev);
+ seq_printf(m, "%02x%02x\t%04x%04x\t%x",
+ dev->bus->number,
+ dev->devfn,
+ dev->vendor,
+ dev->device,
+ dev->irq);
+ /* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */
+ for(i=0; i<7; i++)
+ seq_printf(m, LONG_FORMAT,
+ dev->resource[i].start |
+ (dev->resource[i].flags & PCI_REGION_FLAG_MASK));
+ for(i=0; i<7; i++)
+ seq_printf(m, LONG_FORMAT,
+ dev->resource[i].start < dev->resource[i].end ?
+ dev->resource[i].end - dev->resource[i].start + 1 : 0);
+ seq_putc(m, '\t');
+ if (drv)
+ seq_printf(m, "%s", drv->name);
+ seq_putc(m, '\n');
+ return 0;
+}
+
+static struct seq_operations proc_bus_pci_devices_op = {
+ start: pci_seq_start,
+ next: pci_seq_next,
+ stop: pci_seq_stop,
+ show: show_device
+};
+
+struct proc_dir_entry *proc_bus_pci_dir;
+
+int pci_proc_attach_device(struct pci_dev *dev)
+{
+ struct pci_bus *bus = dev->bus;
+ struct proc_dir_entry *de, *e;
+ char name[16];
+
+ if (!(de = bus->procdir)) {
+ sprintf(name, "%02x", bus->number);
+ de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
+ if (!de)
+ return -ENOMEM;
+ }
+ sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+ e = dev->procent = create_proc_entry(name, S_IFREG | S_IRUGO | S_IWUSR, de);
+ if (!e)
+ return -ENOMEM;
+ e->proc_fops = &proc_bus_pci_operations;
+ e->data = dev;
+ e->size = PCI_CFG_SPACE_SIZE;
+ return 0;
+}
+
+int pci_proc_detach_device(struct pci_dev *dev)
+{
+ struct proc_dir_entry *e;
+
+ if ((e = dev->procent)) {
+ if (atomic_read(&e->count))
+ return -EBUSY;
+ remove_proc_entry(e->name, dev->bus->procdir);
+ dev->procent = NULL;
+ }
+ return 0;
+}
+
+int pci_proc_attach_bus(struct pci_bus* bus)
+{
+ struct proc_dir_entry *de = bus->procdir;
+
+ if (!de) {
+ char name[16];
+ sprintf(name, "%02x", bus->number);
+ de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
+ if (!de)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int pci_proc_detach_bus(struct pci_bus* bus)
+{
+ struct proc_dir_entry *de = bus->procdir;
+ if (de)
+ remove_proc_entry(de->name, proc_bus_pci_dir);
+ return 0;
+}
+
+
+/*
+ * Backward compatible /proc/pci interface.
+ */
+
+/*
+ * Convert some of the configuration space registers of the device at
+ * address (bus,devfn) into a string (possibly several lines each).
+ * The configuration string is stored starting at buf[len]. If the
+ * string would exceed the size of the buffer (SIZE), 0 is returned.
+ */
+static int show_dev_config(struct seq_file *m, void *v)
+{
+ struct list_head *p = v;
+ struct pci_dev *dev;
+ struct pci_driver *drv;
+ u32 class_rev;
+ unsigned char latency, min_gnt, max_lat, *class;
+ int reg;
+
+ if (p == &pci_devices) {
+ seq_puts(m, "PCI devices found:\n");
+ return 0;
+ }
+
+ dev = pci_dev_g(p);
+ drv = pci_dev_driver(dev);
+
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+ pci_read_config_byte (dev, PCI_LATENCY_TIMER, &latency);
+ pci_read_config_byte (dev, PCI_MIN_GNT, &min_gnt);
+ pci_read_config_byte (dev, PCI_MAX_LAT, &max_lat);
+ seq_printf(m, " Bus %2d, device %3d, function %2d:\n",
+ dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+ class = pci_class_name(class_rev >> 16);
+ if (class)
+ seq_printf(m, " %s", class);
+ else
+ seq_printf(m, " Class %04x", class_rev >> 16);
+ seq_printf(m, ": %s (rev %d).\n", dev->name, class_rev & 0xff);
+
+ if (dev->irq)
+ seq_printf(m, " IRQ %d.\n", dev->irq);
+
+ if (latency || min_gnt || max_lat) {
+ seq_printf(m, " Master Capable. ");
+ if (latency)
+ seq_printf(m, "Latency=%d. ", latency);
+ else
+ seq_puts(m, "No bursts. ");
+ if (min_gnt)
+ seq_printf(m, "Min Gnt=%d.", min_gnt);
+ if (max_lat)
+ seq_printf(m, "Max Lat=%d.", max_lat);
+ seq_putc(m, '\n');
+ }
+
+ for (reg = 0; reg < 6; reg++) {
+ struct resource *res = dev->resource + reg;
+ unsigned long base, end, flags;
+
+ base = res->start;
+ end = res->end;
+ flags = res->flags;
+ if (!end)
+ continue;
+
+ if (flags & PCI_BASE_ADDRESS_SPACE_IO) {
+ seq_printf(m, " I/O at 0x%lx [0x%lx].\n",
+ base, end);
+ } else {
+ const char *pref, *type = "unknown";
+
+ if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
+ pref = "P";
+ else
+ pref = "Non-p";
+ switch (flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) {
+ case PCI_BASE_ADDRESS_MEM_TYPE_32:
+ type = "32 bit"; break;
+ case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+ type = "20 bit"; break;
+ case PCI_BASE_ADDRESS_MEM_TYPE_64:
+ type = "64 bit"; break;
+ }
+ seq_printf(m, " %srefetchable %s memory at "
+ "0x%lx [0x%lx].\n", pref, type,
+ base,
+ end);
+ }
+ }
+ return 0;
+}
+
+static struct seq_operations proc_pci_op = {
+ start: pci_seq_start,
+ next: pci_seq_next,
+ stop: pci_seq_stop,
+ show: show_dev_config
+};
+
+static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &proc_bus_pci_devices_op);
+}
+static struct file_operations proc_bus_pci_dev_operations = {
+ open: proc_bus_pci_dev_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+static int proc_pci_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &proc_pci_op);
+}
+static struct file_operations proc_pci_operations = {
+ open: proc_pci_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
+static int __init pci_proc_init(void)
+{
+ if (pci_present()) {
+ struct proc_dir_entry *entry;
+ struct pci_dev *dev;
+ proc_bus_pci_dir = proc_mkdir("pci", proc_bus);
+ entry = create_proc_entry("devices", 0, proc_bus_pci_dir);
+ if (entry)
+ entry->proc_fops = &proc_bus_pci_dev_operations;
+ pci_for_each_dev(dev) {
+ pci_proc_attach_device(dev);
+ }
+ entry = create_proc_entry("pci", 0, NULL);
+ if (entry)
+ entry->proc_fops = &proc_pci_operations;
+ }
+ return 0;
+}
+
+__initcall(pci_proc_init);
diff --git a/xen/drivers/pci/quirks.c b/xen/drivers/pci/quirks.c
new file mode 100644
index 0000000000..54e3e974d3
--- /dev/null
+++ b/xen/drivers/pci/quirks.c
@@ -0,0 +1,666 @@
+/*
+ * $Id: quirks.c,v 1.5 1998/05/02 19:24:14 mj Exp $
+ *
+ * This file contains work-arounds for many known PCI hardware
+ * bugs. Devices present only on certain architectures (host
+ * bridges et cetera) should be handled in arch-specific code.
+ *
+ * Copyright (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ * The bridge optimization stuff has been removed. If you really
+ * have a silly BIOS which is unable to set your host bridge right,
+ * use the PowerTweak utility (see http://powertweak.sourceforge.net).
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+
+#undef DEBUG
+
+/* Deal with broken BIOS'es that neglect to enable passive release,
+ which can cause problems in combination with the 82441FX/PPro MTRRs */
+static void __init quirk_passive_release(struct pci_dev *dev)
+{
+ struct pci_dev *d = NULL;
+ unsigned char dlc;
+
+ /* We have to make sure a particular bit is set in the PIIX3
+ ISA bridge, so we have to go out and find it. */
+ while ((d = pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, d))) {
+ pci_read_config_byte(d, 0x82, &dlc);
+ if (!(dlc & 1<<1)) {
+ printk(KERN_ERR "PCI: PIIX3: Enabling Passive Release on %s\n", d->slot_name);
+ dlc |= 1<<1;
+ pci_write_config_byte(d, 0x82, dlc);
+ }
+ }
+}
+
+/* The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround
+ but VIA don't answer queries. If you happen to have good contacts at VIA
+ ask them for me please -- Alan
+
+ This appears to be BIOS not version dependent. So presumably there is a
+ chipset level fix */
+
+
+int isa_dma_bridge_buggy; /* Exported */
+
+static void __init quirk_isa_dma_hangs(struct pci_dev *dev)
+{
+ if (!isa_dma_bridge_buggy) {
+ isa_dma_bridge_buggy=1;
+ printk(KERN_INFO "Activating ISA DMA hang workarounds.\n");
+ }
+}
+
+int pci_pci_problems;
+
+/*
+ * Chipsets where PCI->PCI transfers vanish or hang
+ */
+
+static void __init quirk_nopcipci(struct pci_dev *dev)
+{
+ if((pci_pci_problems&PCIPCI_FAIL)==0)
+ {
+ printk(KERN_INFO "Disabling direct PCI/PCI transfers.\n");
+ pci_pci_problems|=PCIPCI_FAIL;
+ }
+}
+
+/*
+ * Triton requires workarounds to be used by the drivers
+ */
+
+static void __init quirk_triton(struct pci_dev *dev)
+{
+ if((pci_pci_problems&PCIPCI_TRITON)==0)
+ {
+ printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+ pci_pci_problems|=PCIPCI_TRITON;
+ }
+}
+
+/*
+ * VIA Apollo KT133 needs PCI latency patch
+ * Made according to a windows driver based patch by George E. Breese
+ * see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
+ * Also see http://www.au-ja.org/review-kt133a-1-en.phtml for the info on which
+ * Mr Breese based his work.
+ *
+ * Updated based on further information from the site and also on
+ * information provided by VIA
+ */
+static void __init quirk_vialatency(struct pci_dev *dev)
+{
+ struct pci_dev *p;
+ u8 rev;
+ u8 busarb;
+ /* Ok we have a potential problem chipset here. Now see if we have
+ a buggy southbridge */
+
+ p=pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL);
+ if(p!=NULL)
+ {
+ pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
+ /* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */
+ /* Check for buggy part revisions */
+ if (rev < 0x40 || rev > 0x42)
+ return;
+ }
+ else
+ {
+ p = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
+ if(p==NULL) /* No problem parts */
+ return;
+ pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
+ /* Check for buggy part revisions */
+ if (rev < 0x10 || rev > 0x12)
+ return;
+ }
+
+ /*
+ * Ok we have the problem. Now set the PCI master grant to
+ * occur every master grant. The apparent bug is that under high
+ * PCI load (quite common in Linux of course) you can get data
+ * loss when the CPU is held off the bus for 3 bus master requests
+ * This happens to include the IDE controllers....
+ *
+ * VIA only apply this fix when an SB Live! is present but under
+ * both Linux and Windows this isnt enough, and we have seen
+ * corruption without SB Live! but with things like 3 UDMA IDE
+ * controllers. So we ignore that bit of the VIA recommendation..
+ */
+
+ pci_read_config_byte(dev, 0x76, &busarb);
+ /* Set bit 4 and bi 5 of byte 76 to 0x01
+ "Master priority rotation on every PCI master grant */
+ busarb &= ~(1<<5);
+ busarb |= (1<<4);
+ pci_write_config_byte(dev, 0x76, busarb);
+ printk(KERN_INFO "Applying VIA southbridge workaround.\n");
+}
+
+/*
+ * VIA Apollo VP3 needs ETBF on BT848/878
+ */
+
+static void __init quirk_viaetbf(struct pci_dev *dev)
+{
+ if((pci_pci_problems&PCIPCI_VIAETBF)==0)
+ {
+ printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+ pci_pci_problems|=PCIPCI_VIAETBF;
+ }
+}
+static void __init quirk_vsfx(struct pci_dev *dev)
+{
+ if((pci_pci_problems&PCIPCI_VSFX)==0)
+ {
+ printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+ pci_pci_problems|=PCIPCI_VSFX;
+ }
+}
+
+/*
+ * Ali Magik requires workarounds to be used by the drivers
+ * that DMA to AGP space. Latency must be set to 0xA and triton
+ * workaround applied too
+ * [Info kindly provided by ALi]
+ */
+
+static void __init quirk_alimagik(struct pci_dev *dev)
+{
+ if((pci_pci_problems&PCIPCI_ALIMAGIK)==0)
+ {
+ printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+ pci_pci_problems|=PCIPCI_ALIMAGIK|PCIPCI_TRITON;
+ }
+}
+
+/*
+ * Natoma has some interesting boundary conditions with Zoran stuff
+ * at least
+ */
+
+static void __init quirk_natoma(struct pci_dev *dev)
+{
+ if((pci_pci_problems&PCIPCI_NATOMA)==0)
+ {
+ printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+ pci_pci_problems|=PCIPCI_NATOMA;
+ }
+}
+
+/*
+ * S3 868 and 968 chips report region size equal to 32M, but they decode 64M.
+ * If it's needed, re-allocate the region.
+ */
+
+static void __init quirk_s3_64M(struct pci_dev *dev)
+{
+ struct resource *r = &dev->resource[0];
+
+ if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) {
+ r->start = 0;
+ r->end = 0x3ffffff;
+ }
+}
+
+static void __init quirk_io_region(struct pci_dev *dev, unsigned region, unsigned size, int nr)
+{
+ region &= ~(size-1);
+ if (region) {
+ struct resource *res = dev->resource + nr;
+
+ res->name = dev->name;
+ res->start = region;
+ res->end = region + size - 1;
+ res->flags = IORESOURCE_IO;
+ pci_claim_resource(dev, nr);
+ }
+}
+
+/*
+ * ATI Northbridge setups MCE the processor if you even
+ * read somewhere between 0x3b0->0x3bb or read 0x3d3
+ */
+
+static void __devinit quirk_ati_exploding_mce(struct pci_dev *dev)
+{
+ printk(KERN_INFO "ATI Northbridge, reserving I/O ports 0x3b0 to 0x3bb.\n");
+ /* Mae rhaid in i beidio a edrych ar y lleoliad I/O hyn */
+ request_region(0x3b0, 0x0C, "RadeonIGP");
+ request_region(0x3d3, 0x01, "RadeonIGP");
+}
+
+/*
+ * Let's make the southbridge information explicit instead
+ * of having to worry about people probing the ACPI areas,
+ * for example.. (Yes, it happens, and if you read the wrong
+ * ACPI register it will put the machine to sleep with no
+ * way of waking it up again. Bummer).
+ *
+ * ALI M7101: Two IO regions pointed to by words at
+ * 0xE0 (64 bytes of ACPI registers)
+ * 0xE2 (32 bytes of SMB registers)
+ */
+static void __init quirk_ali7101_acpi(struct pci_dev *dev)
+{
+ u16 region;
+
+ pci_read_config_word(dev, 0xE0, &region);
+ quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
+ pci_read_config_word(dev, 0xE2, &region);
+ quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
+}
+
+/*
+ * PIIX4 ACPI: Two IO regions pointed to by longwords at
+ * 0x40 (64 bytes of ACPI registers)
+ * 0x90 (32 bytes of SMB registers)
+ */
+static void __init quirk_piix4_acpi(struct pci_dev *dev)
+{
+ u32 region;
+
+ pci_read_config_dword(dev, 0x40, &region);
+ quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
+ pci_read_config_dword(dev, 0x90, &region);
+ quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
+}
+
+/*
+ * VIA ACPI: One IO region pointed to by longword at
+ * 0x48 or 0x20 (256 bytes of ACPI registers)
+ */
+static void __init quirk_vt82c586_acpi(struct pci_dev *dev)
+{
+ u8 rev;
+ u32 region;
+
+ pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
+ if (rev & 0x10) {
+ pci_read_config_dword(dev, 0x48, &region);
+ region &= PCI_BASE_ADDRESS_IO_MASK;
+ quirk_io_region(dev, region, 256, PCI_BRIDGE_RESOURCES);
+ }
+}
+
+/*
+ * VIA VT82C686 ACPI: Three IO region pointed to by (long)words at
+ * 0x48 (256 bytes of ACPI registers)
+ * 0x70 (128 bytes of hardware monitoring register)
+ * 0x90 (16 bytes of SMB registers)
+ */
+static void __init quirk_vt82c686_acpi(struct pci_dev *dev)
+{
+ u16 hm;
+ u32 smb;
+
+ quirk_vt82c586_acpi(dev);
+
+ pci_read_config_word(dev, 0x70, &hm);
+ hm &= PCI_BASE_ADDRESS_IO_MASK;
+ quirk_io_region(dev, hm, 128, PCI_BRIDGE_RESOURCES + 1);
+
+ pci_read_config_dword(dev, 0x90, &smb);
+ smb &= PCI_BASE_ADDRESS_IO_MASK;
+ quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 2);
+}
+
+
+#ifdef CONFIG_X86_IO_APIC
+extern int nr_ioapics;
+
+/*
+ * VIA 686A/B: If an IO-APIC is active, we need to route all on-chip
+ * devices to the external APIC.
+ *
+ * TODO: When we have device-specific interrupt routers,
+ * this code will go away from quirks.
+ */
+static void __init quirk_via_ioapic(struct pci_dev *dev)
+{
+ u8 tmp;
+
+ if (nr_ioapics < 1)
+ tmp = 0; /* nothing routed to external APIC */
+ else
+ tmp = 0x1f; /* all known bits (4-0) routed to external APIC */
+
+ printk(KERN_INFO "PCI: %sbling Via external APIC routing\n",
+ tmp == 0 ? "Disa" : "Ena");
+
+ /* Offset 0x58: External APIC IRQ output control */
+ pci_write_config_byte (dev, 0x58, tmp);
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+
+/*
+ * Via 686A/B: The PCI_INTERRUPT_LINE register for the on-chip
+ * devices, USB0/1, AC97, MC97, and ACPI, has an unusual feature:
+ * when written, it makes an internal connection to the PIC.
+ * For these devices, this register is defined to be 4 bits wide.
+ * Normally this is fine. However for IO-APIC motherboards, or
+ * non-x86 architectures (yes Via exists on PPC among other places),
+ * we must mask the PCI_INTERRUPT_LINE value versus 0xf to get
+ * interrupts delivered properly.
+ *
+ * TODO: When we have device-specific interrupt routers,
+ * quirk_via_irqpic will go away from quirks.
+ */
+
+/*
+ * FIXME: it is questionable that quirk_via_acpi
+ * is needed. It shows up as an ISA bridge, and does not
+ * support the PCI_INTERRUPT_LINE register at all. Therefore
+ * it seems like setting the pci_dev's 'irq' to the
+ * value of the ACPI SCI interrupt is only done for convenience.
+ * -jgarzik
+ */
+static void __init quirk_via_acpi(struct pci_dev *d)
+{
+ /*
+ * VIA ACPI device: SCI IRQ line in PCI config byte 0x42
+ */
+ u8 irq;
+ pci_read_config_byte(d, 0x42, &irq);
+ irq &= 0xf;
+ if (irq && (irq != 2))
+ d->irq = irq;
+}
+
+static void __init quirk_via_irqpic(struct pci_dev *dev)
+{
+ u8 irq, new_irq = dev->irq & 0xf;
+
+ pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+
+ if (new_irq != irq) {
+ printk(KERN_INFO "PCI: Via IRQ fixup for %s, from %d to %d\n",
+ dev->slot_name, irq, new_irq);
+
+ udelay(15);
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq);
+ }
+}
+
+
+/*
+ * PIIX3 USB: We have to disable USB interrupts that are
+ * hardwired to PIRQD# and may be shared with an
+ * external device.
+ *
+ * Legacy Support Register (LEGSUP):
+ * bit13: USB PIRQ Enable (USBPIRQDEN),
+ * bit4: Trap/SMI On IRQ Enable (USBSMIEN).
+ *
+ * We mask out all r/wc bits, too.
+ */
+static void __init quirk_piix3_usb(struct pci_dev *dev)
+{
+ u16 legsup;
+
+ pci_read_config_word(dev, 0xc0, &legsup);
+ legsup &= 0x50ef;
+ pci_write_config_word(dev, 0xc0, legsup);
+}
+
+/*
+ * VIA VT82C598 has its device ID settable and many BIOSes
+ * set it to the ID of VT82C597 for backward compatibility.
+ * We need to switch it off to be able to recognize the real
+ * type of the chip.
+ */
+static void __init quirk_vt82c598_id(struct pci_dev *dev)
+{
+ pci_write_config_byte(dev, 0xfc, 0);
+ pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device);
+}
+
+/*
+ * CardBus controllers have a legacy base address that enables them
+ * to respond as i82365 pcmcia controllers. We don't want them to
+ * do this even if the Linux CardBus driver is not loaded, because
+ * the Linux i82365 driver does not (and should not) handle CardBus.
+ */
+static void __init quirk_cardbus_legacy(struct pci_dev *dev)
+{
+ if ((PCI_CLASS_BRIDGE_CARDBUS << 8) ^ dev->class)
+ return;
+ pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0);
+}
+
+/*
+ * The AMD io apic can hang the box when an apic irq is masked.
+ * We check all revs >= B0 (yet not in the pre production!) as the bug
+ * is currently marked NoFix
+ *
+ * We have multiple reports of hangs with this chipset that went away with
+ * noapic specified. For the moment we assume its the errata. We may be wrong
+ * of course. However the advice is demonstrably good even if so..
+ */
+
+static void __init quirk_amd_ioapic(struct pci_dev *dev)
+{
+ u8 rev;
+
+ pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
+ if(rev >= 0x02)
+ {
+ printk(KERN_WARNING "I/O APIC: AMD Errata #22 may be present. In the event of instability try\n");
+ printk(KERN_WARNING " : booting with the \"noapic\" option.\n");
+ }
+}
+
+/*
+ * Following the PCI ordering rules is optional on the AMD762. I'm not
+ * sure what the designers were smoking but let's not inhale...
+ *
+ * To be fair to AMD, it follows the spec by default, its BIOS people
+ * who turn it off!
+ */
+
+static void __init quirk_amd_ordering(struct pci_dev *dev)
+{
+ u32 pcic;
+ pci_read_config_dword(dev, 0x4C, &pcic);
+ if((pcic&6)!=6)
+ {
+ pcic |= 6;
+ printk(KERN_WARNING "BIOS failed to enable PCI standards compliance, fixing this error.\n");
+ pci_write_config_dword(dev, 0x4C, pcic);
+ pci_read_config_dword(dev, 0x84, &pcic);
+ pcic |= (1<<23); /* Required in this mode */
+ pci_write_config_dword(dev, 0x84, pcic);
+ }
+}
+
+/*
+ * DreamWorks provided workaround for Dunord I-3000 problem
+ *
+ * This card decodes and responds to addresses not apparently
+ * assigned to it. We force a larger allocation to ensure that
+ * nothing gets put too close to it.
+ */
+
+static void __init quirk_dunord ( struct pci_dev * dev )
+{
+ struct resource * r = & dev -> resource [ 1 ];
+ r -> start = 0;
+ r -> end = 0xffffff;
+}
+
+static void __init quirk_transparent_bridge(struct pci_dev *dev)
+{
+ dev->transparent = 1;
+}
+
+/*
+ * Common misconfiguration of the MediaGX/Geode PCI master that will
+ * reduce PCI bandwidth from 70MB/s to 25MB/s. See the GXM/GXLV/GX1
+ * datasheets found at http://www.national.com/ds/GX for info on what
+ * these bits do. <christer@weinigel.se>
+ */
+
+static void __init quirk_mediagx_master(struct pci_dev *dev)
+{
+ u8 reg;
+ pci_read_config_byte(dev, 0x41, &reg);
+ if (reg & 2) {
+ reg &= ~2;
+ printk(KERN_INFO "PCI: Fixup for MediaGX/Geode Slave Disconnect Boundary (0x41=0x%02x)\n", reg);
+ pci_write_config_byte(dev, 0x41, reg);
+ }
+}
+
+/*
+ * As per PCI spec, ignore base address registers 0-3 of the IDE controllers
+ * running in Compatible mode (bits 0 and 2 in the ProgIf for primary and
+ * secondary channels respectively). If the device reports Compatible mode
+ * but does use BAR0-3 for address decoding, we assume that firmware has
+ * programmed these BARs with standard values (0x1f0,0x3f4 and 0x170,0x374).
+ * Exceptions (if they exist) must be handled in chip/architecture specific
+ * fixups.
+ *
+ * Note: for non x86 people. You may need an arch specific quirk to handle
+ * moving IDE devices to native mode as well. Some plug in card devices power
+ * up in compatible mode and assume the BIOS will adjust them.
+ *
+ * Q: should we load the 0x1f0,0x3f4 into the registers or zap them as
+ * we do now ? We don't want is pci_enable_device to come along
+ * and assign new resources. Both approaches work for that.
+ */
+
+static void __devinit quirk_ide_bases(struct pci_dev *dev)
+{
+ struct resource *res;
+ int first_bar = 2, last_bar = 0;
+
+ if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+ return;
+
+ res = &dev->resource[0];
+
+ /* primary channel: ProgIf bit 0, BAR0, BAR1 */
+ if (!(dev->class & 1) && (res[0].flags || res[1].flags)) {
+ res[0].start = res[0].end = res[0].flags = 0;
+ res[1].start = res[1].end = res[1].flags = 0;
+ first_bar = 0;
+ last_bar = 1;
+ }
+
+ /* secondary channel: ProgIf bit 2, BAR2, BAR3 */
+ if (!(dev->class & 4) && (res[2].flags || res[3].flags)) {
+ res[2].start = res[2].end = res[2].flags = 0;
+ res[3].start = res[3].end = res[3].flags = 0;
+ last_bar = 3;
+ }
+
+ if (!last_bar)
+ return;
+
+ printk(KERN_INFO "PCI: Ignoring BAR%d-%d of IDE controller %s\n",
+ first_bar, last_bar, dev->slot_name);
+}
+
+/*
+ * The main table of quirks.
+ */
+
+static struct pci_fixup pci_fixups[] __initdata = {
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_DUNORD, PCI_DEVICE_ID_DUNORD_I3000, quirk_dunord },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_passive_release },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_passive_release },
+ /*
+ * Its not totally clear which chipsets are the problematic ones
+ * We know 82C586 and 82C596 variants are affected.
+ */
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, quirk_isa_dma_hangs },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, quirk_isa_dma_hangs },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, quirk_isa_dma_hangs },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_868, quirk_s3_64M },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_968, quirk_s3_64M },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437, quirk_triton },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437VX, quirk_triton },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439, quirk_triton },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quirk_triton },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_natoma },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_0, quirk_natoma },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_1, quirk_natoma },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0, quirk_natoma },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_1, quirk_natoma },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2, quirk_natoma },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1647, quirk_alimagik },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1651, quirk_alimagik },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, quirk_nopcipci },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496, quirk_nopcipci },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, quirk_vialatency },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8371_1, quirk_vialatency },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, quirk_vialatency },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576, quirk_vsfx },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_viaetbf },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_vt82c598_id },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_vt82c586_acpi },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_vt82c686_acpi },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, quirk_piix4_acpi },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101, quirk_ali7101_acpi },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_2, quirk_piix3_usb },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_2, quirk_piix3_usb },
+ { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, quirk_ide_bases },
+ { PCI_FIXUP_FINAL, PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy },
+
+#ifdef CONFIG_X86_IO_APIC
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic },
+#endif
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_via_acpi },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_via_acpi },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_2, quirk_via_irqpic },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, quirk_via_irqpic },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_6, quirk_via_irqpic },
+
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410, quirk_amd_ioapic },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering },
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_IGP, quirk_ati_exploding_mce },
+ /*
+ * i82380FB mobile docking controller: its PCI-to-PCI bridge
+ * is subtractive decoding (transparent), and does indicate this
+ * in the ProgIf. Unfortunately, the ProgIf value is wrong - 0x80
+ * instead of 0x01.
+ */
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82380FB, quirk_transparent_bridge },
+
+ { PCI_FIXUP_FINAL, PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_PCI_MASTER, quirk_mediagx_master },
+
+ { 0 }
+};
+
+
+static void pci_do_fixups(struct pci_dev *dev, int pass, struct pci_fixup *f)
+{
+ while (f->pass) {
+ if (f->pass == pass &&
+ (f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
+ (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+#ifdef DEBUG
+ printk(KERN_INFO "PCI: Calling quirk %p for %s\n", f->hook, dev->slot_name);
+#endif
+ f->hook(dev);
+ }
+ f++;
+ }
+}
+
+void pci_fixup_device(int pass, struct pci_dev *dev)
+{
+ pci_do_fixups(dev, pass, pcibios_fixups);
+ pci_do_fixups(dev, pass, pci_fixups);
+}
diff --git a/xen/drivers/pci/setup-bus.c b/xen/drivers/pci/setup-bus.c
new file mode 100644
index 0000000000..22e7075171
--- /dev/null
+++ b/xen/drivers/pci/setup-bus.c
@@ -0,0 +1,400 @@
+/*
+ * drivers/pci/setup-bus.c
+ *
+ * Extruded from code written by
+ * Dave Rusling (david.rusling@reo.mts.dec.com)
+ * David Mosberger (davidm@cs.arizona.edu)
+ * David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+/*
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ * PCI-PCI bridges cleanup, sorted resource allocation.
+ * Feb 2002, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ * Converted to allocation in 3 passes, which gives
+ * tighter packing. Prefetchable range support.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+
+
+#define DEBUG_CONFIG 1
+#if DEBUG_CONFIG
+# define DBGC(args) printk args
+#else
+# define DBGC(args)
+#endif
+
+#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
+
+static int __init
+pbus_assign_resources_sorted(struct pci_bus *bus)
+{
+ struct list_head *ln;
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx, found_vga = 0;
+
+ head.next = NULL;
+ for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+ struct pci_dev *dev = pci_dev_b(ln);
+ u16 class = dev->class >> 8;
+ u16 cmd;
+
+ /* First, disable the device to avoid side
+ effects of possibly overlapping I/O and
+ memory ranges.
+ Leave VGA enabled - for obvious reason. :-)
+ Same with all sorts of bridges - they may
+ have VGA behind them. */
+ if (class == PCI_CLASS_DISPLAY_VGA
+ || class == PCI_CLASS_NOT_DEFINED_VGA)
+ found_vga = 1;
+ else if (class >> 8 != PCI_BASE_CLASS_BRIDGE) {
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+ | PCI_COMMAND_MASTER);
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+
+ pdev_sort_resources(dev, &head);
+ }
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ pci_assign_resource(list->dev, idx);
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ return found_vga;
+}
+
+/* Initialize bridges with base/limit values we have collected.
+ PCI-to-PCI Bridge Architecture Specification rev. 1.1 (1998)
+ requires that if there is no I/O ports or memory behind the
+ bridge, corresponding range must be turned off by writing base
+ value greater than limit to the bridge's base/limit registers. */
+static void __init
+pci_setup_bridge(struct pci_bus *bus)
+{
+ struct pbus_set_ranges_data ranges;
+ struct pci_dev *bridge = bus->self;
+ u32 l;
+
+ if (!bridge || (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ return;
+
+ ranges.io_start = bus->resource[0]->start;
+ ranges.io_end = bus->resource[0]->end;
+ ranges.mem_start = bus->resource[1]->start;
+ ranges.mem_end = bus->resource[1]->end;
+ ranges.prefetch_start = bus->resource[2]->start;
+ ranges.prefetch_end = bus->resource[2]->end;
+ pcibios_fixup_pbus_ranges(bus, &ranges);
+
+ DBGC((KERN_INFO "PCI: Bus %d, bridge: %s\n",
+ bus->number, bridge->name));
+
+ /* Set up the top and bottom of the PCI I/O segment for this bus. */
+ if (bus->resource[0]->flags & IORESOURCE_IO) {
+ pci_read_config_dword(bridge, PCI_IO_BASE, &l);
+ l &= 0xffff0000;
+ l |= (ranges.io_start >> 8) & 0x00f0;
+ l |= ranges.io_end & 0xf000;
+ /* Set up upper 16 bits of I/O base/limit. */
+ pci_write_config_word(bridge, PCI_IO_BASE_UPPER16,
+ ranges.io_start >> 16);
+ pci_write_config_word(bridge, PCI_IO_LIMIT_UPPER16,
+ ranges.io_end >> 16);
+ DBGC((KERN_INFO " IO window: %04lx-%04lx\n",
+ ranges.io_start, ranges.io_end));
+ }
+ else {
+ /* Clear upper 16 bits of I/O base/limit. */
+ pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0);
+ l = 0x00f0;
+ DBGC((KERN_INFO " IO window: disabled.\n"));
+ }
+ pci_write_config_dword(bridge, PCI_IO_BASE, l);
+
+ /* Set up the top and bottom of the PCI Memory segment
+ for this bus. */
+ if (bus->resource[1]->flags & IORESOURCE_MEM) {
+ l = (ranges.mem_start >> 16) & 0xfff0;
+ l |= ranges.mem_end & 0xfff00000;
+ DBGC((KERN_INFO " MEM window: %08lx-%08lx\n",
+ ranges.mem_start, ranges.mem_end));
+ }
+ else {
+ l = 0x0000fff0;
+ DBGC((KERN_INFO " MEM window: disabled.\n"));
+ }
+ pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+
+ /* Clear out the upper 32 bits of PREF base/limit. */
+ pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, 0);
+ pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
+
+ /* Set up PREF base/limit. */
+ if (bus->resource[2]->flags & IORESOURCE_PREFETCH) {
+ l = (ranges.prefetch_start >> 16) & 0xfff0;
+ l |= ranges.prefetch_end & 0xfff00000;
+ DBGC((KERN_INFO " PREFETCH window: %08lx-%08lx\n",
+ ranges.prefetch_start, ranges.prefetch_end));
+ }
+ else {
+ l = 0x0000fff0;
+ DBGC((KERN_INFO " PREFETCH window: disabled.\n"));
+ }
+ pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
+
+ /* Check if we have VGA behind the bridge.
+ Enable ISA in either case (FIXME!). */
+ l = (bus->resource[0]->flags & IORESOURCE_BUS_HAS_VGA) ? 0x0c : 0x04;
+ pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, l);
+}
+
+/* Check whether the bridge supports optional I/O and
+ prefetchable memory ranges. If not, the respective
+ base/limit registers must be read-only and read as 0. */
+static void __init
+pci_bridge_check_ranges(struct pci_bus *bus)
+{
+ u16 io;
+ u32 pmem;
+ struct pci_dev *bridge = bus->self;
+ struct resource *b_res;
+
+ if (!bridge || (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ return;
+
+ b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
+ b_res[1].flags |= IORESOURCE_MEM;
+
+ pci_read_config_word(bridge, PCI_IO_BASE, &io);
+ if (!io) {
+ pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0);
+ pci_read_config_word(bridge, PCI_IO_BASE, &io);
+ pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
+ }
+ if (io)
+ b_res[0].flags |= IORESOURCE_IO;
+ /* DECchip 21050 pass 2 errata: the bridge may miss an address
+ disconnect boundary by one PCI data phase.
+ Workaround: do not use prefetching on this device. */
+ if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
+ return;
+ pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+ if (!pmem) {
+ pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
+ 0xfff0fff0);
+ pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+ pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
+ }
+ if (pmem)
+ b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
+}
+
+/* Sizing the IO windows of the PCI-PCI bridge is trivial,
+ since these windows have 4K granularity and the IO ranges
+ of non-bridge PCI devices are limited to 256 bytes.
+ We must be careful with the ISA aliasing though. */
+static void __init
+pbus_size_io(struct pci_bus *bus)
+{
+ struct list_head *ln;
+ struct resource *b_res = bus->resource[0];
+ unsigned long size = 0, size1 = 0;
+
+ if (!(b_res->flags & IORESOURCE_IO))
+ return;
+
+ for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+ struct pci_dev *dev = pci_dev_b(ln);
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r = &dev->resource[i];
+ unsigned long r_size;
+
+ if (r->parent || !(r->flags & IORESOURCE_IO))
+ continue;
+ r_size = r->end - r->start + 1;
+
+ if (r_size < 0x400)
+ /* Might be re-aligned for ISA */
+ size += r_size;
+ else
+ size1 += r_size;
+ }
+ /* ??? Reserve some resources for CardBus. */
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
+ size1 += 4*1024;
+ }
+/* To be fixed in 2.5: we should have sort of HAVE_ISA
+ flag in the struct pci_bus. */
+#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
+ size = (size & 0xff) + ((size & ~0xffUL) << 2);
+#endif
+ size = ROUND_UP(size + size1, 4096);
+ if (!size) {
+ b_res->flags = 0;
+ return;
+ }
+ /* Alignment of the IO window is always 4K */
+ b_res->start = 4096;
+ b_res->end = b_res->start + size - 1;
+}
+
+/* Calculate the size of the bus and minimal alignment which
+ guarantees that all child resources fit in this size. */
+static void __init
+pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type)
+{
+ struct list_head *ln;
+ unsigned long min_align, align, size;
+ unsigned long aligns[12]; /* Alignments from 1Mb to 2Gb */
+ int order, max_order;
+ struct resource *b_res = (type & IORESOURCE_PREFETCH) ?
+ bus->resource[2] : bus->resource[1];
+
+ memset(aligns, 0, sizeof(aligns));
+ max_order = 0;
+ size = 0;
+
+ for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+ struct pci_dev *dev = pci_dev_b(ln);
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r = &dev->resource[i];
+ unsigned long r_size;
+
+ if (r->parent || (r->flags & mask) != type)
+ continue;
+ r_size = r->end - r->start + 1;
+ /* For bridges size != alignment */
+ align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
+ order = ffz(~align) - 20;
+ if (order > 11) {
+ printk(KERN_WARNING "PCI: region %s/%d "
+ "too large: %lx-%lx\n",
+ dev->slot_name, i, r->start, r->end);
+ r->flags = 0;
+ continue;
+ }
+ size += r_size;
+ if (order < 0)
+ order = 0;
+ /* Exclude ranges with size > align from
+ calculation of the alignment. */
+ if (size == align)
+ aligns[order] += align;
+ if (order > max_order)
+ max_order = order;
+ }
+ /* ??? Reserve some resources for CardBus. */
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS) {
+ size += 1UL << 24; /* 16 Mb */
+ aligns[24 - 20] += 1UL << 24;
+ }
+ }
+
+ align = 0;
+ min_align = 0;
+ for (order = 0; order <= max_order; order++) {
+ unsigned long align1 = 1UL << (order + 20);
+
+ if (!align)
+ min_align = align1;
+ else if (ROUND_UP(align + min_align, min_align) < align1)
+ min_align = align1 >> 1;
+ align += aligns[order];
+ }
+ size = ROUND_UP(size, min_align);
+ if (!size) {
+ b_res->flags = 0;
+ return;
+ }
+ b_res->start = min_align;
+ b_res->end = size + min_align - 1;
+}
+
+void __init
+pbus_size_bridges(struct pci_bus *bus)
+{
+ struct list_head *ln;
+ unsigned long mask, type;
+
+ for (ln=bus->children.next; ln != &bus->children; ln=ln->next)
+ pbus_size_bridges(pci_bus_b(ln));
+
+ /* The root bus? */
+ if (!bus->self)
+ return;
+
+ pci_bridge_check_ranges(bus);
+
+ pbus_size_io(bus);
+
+ mask = type = IORESOURCE_MEM;
+ /* If the bridge supports prefetchable range, size it separately. */
+ if (bus->resource[2] &&
+ bus->resource[2]->flags & IORESOURCE_PREFETCH) {
+ pbus_size_mem(bus, IORESOURCE_PREFETCH, IORESOURCE_PREFETCH);
+ mask |= IORESOURCE_PREFETCH; /* Size non-prefetch only. */
+ }
+ pbus_size_mem(bus, mask, type);
+}
+
+void __init
+pbus_assign_resources(struct pci_bus *bus)
+{
+ struct list_head *ln;
+ int found_vga = pbus_assign_resources_sorted(bus);
+
+ if (found_vga) {
+ struct pci_bus *b;
+
+ /* Propagate presence of the VGA to upstream bridges */
+ for (b = bus; b->parent; b = b->parent) {
+ b->resource[0]->flags |= IORESOURCE_BUS_HAS_VGA;
+ }
+ }
+ for (ln=bus->children.next; ln != &bus->children; ln=ln->next) {
+ struct pci_bus *b = pci_bus_b(ln);
+
+ pbus_assign_resources(b);
+ pci_setup_bridge(b);
+ }
+}
+
+void __init
+pci_assign_unassigned_resources(void)
+{
+ struct list_head *ln;
+ struct pci_dev *dev;
+
+ /* Depth first, calculate sizes and alignments of all
+ subordinate buses. */
+ for(ln=pci_root_buses.next; ln != &pci_root_buses; ln=ln->next)
+ pbus_size_bridges(pci_bus_b(ln));
+ /* Depth last, allocate resources and update the hardware. */
+ for(ln=pci_root_buses.next; ln != &pci_root_buses; ln=ln->next)
+ pbus_assign_resources(pci_bus_b(ln));
+
+ pci_for_each_dev(dev) {
+ pdev_enable_device(dev);
+ }
+}
diff --git a/xen/drivers/pci/setup-irq.c b/xen/drivers/pci/setup-irq.c
new file mode 100644
index 0000000000..4c65b2e98d
--- /dev/null
+++ b/xen/drivers/pci/setup-irq.c
@@ -0,0 +1,71 @@
+/*
+ * drivers/pci/setup-irq.c
+ *
+ * Extruded from code written by
+ * Dave Rusling (david.rusling@reo.mts.dec.com)
+ * David Mosberger (davidm@cs.arizona.edu)
+ * David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args) printk args
+#else
+# define DBGC(args)
+#endif
+
+
+static void __init
+pdev_fixup_irq(struct pci_dev *dev,
+ u8 (*swizzle)(struct pci_dev *, u8 *),
+ int (*map_irq)(struct pci_dev *, u8, u8))
+{
+ u8 pin, slot;
+ int irq;
+
+ /* If this device is not on the primary bus, we need to figure out
+ which interrupt pin it will come in on. We know which slot it
+ will come in on 'cos that slot is where the bridge is. Each
+ time the interrupt line passes through a PCI-PCI bridge we must
+ apply the swizzle function. */
+
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ /* Cope with 0 and illegal. */
+ if (pin == 0 || pin > 4)
+ pin = 1;
+
+ /* Follow the chain of bridges, swizzling as we go. */
+ slot = (*swizzle)(dev, &pin);
+
+ irq = (*map_irq)(dev, slot, pin);
+ if (irq == -1)
+ irq = 0;
+ dev->irq = irq;
+
+ DBGC((KERN_ERR "PCI fixup irq: (%s) got %d\n", dev->name, dev->irq));
+
+ /* Always tell the device, so the driver knows what is
+ the real IRQ to use; the device does not use it. */
+ pcibios_update_irq(dev, irq);
+}
+
+void __init
+pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
+ int (*map_irq)(struct pci_dev *, u8, u8))
+{
+ struct pci_dev *dev;
+ pci_for_each_dev(dev) {
+ pdev_fixup_irq(dev, swizzle, map_irq);
+ }
+}
diff --git a/xen/drivers/pci/setup-res.c b/xen/drivers/pci/setup-res.c
new file mode 100644
index 0000000000..1053ad5489
--- /dev/null
+++ b/xen/drivers/pci/setup-res.c
@@ -0,0 +1,241 @@
+/*
+ * drivers/pci/setup-res.c
+ *
+ * Extruded from code written by
+ * Dave Rusling (david.rusling@reo.mts.dec.com)
+ * David Mosberger (davidm@cs.arizona.edu)
+ * David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+/* fixed for multiple pci buses, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+/*
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ * Resource sorting
+ */
+
+#include <linux/init.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args) printk args
+#else
+# define DBGC(args)
+#endif
+
+
+int __init
+pci_claim_resource(struct pci_dev *dev, int resource)
+{
+ struct resource *res = &dev->resource[resource];
+ struct resource *root = pci_find_parent_resource(dev, res);
+ int err;
+
+ err = -EINVAL;
+ if (root != NULL) {
+ err = request_resource(root, res);
+ if (err) {
+ printk(KERN_ERR "PCI: Address space collision on "
+ "region %d of device %s [%lx:%lx]\n",
+ resource, dev->name, res->start, res->end);
+ }
+ } else {
+ printk(KERN_ERR "PCI: No parent found for region %d "
+ "of device %s\n", resource, dev->name);
+ }
+
+ return err;
+}
+
+/*
+ * Given the PCI bus a device resides on, try to
+ * find an acceptable resource allocation for a
+ * specific device resource..
+ */
+static int pci_assign_bus_resource(const struct pci_bus *bus,
+ struct pci_dev *dev,
+ struct resource *res,
+ unsigned long size,
+ unsigned long min,
+ unsigned int type_mask,
+ int resno)
+{
+ unsigned long align;
+ int i;
+
+ type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
+ for (i = 0 ; i < 4; i++) {
+ struct resource *r = bus->resource[i];
+ if (!r)
+ continue;
+
+ /* type_mask must match */
+ if ((res->flags ^ r->flags) & type_mask)
+ continue;
+
+ /* We cannot allocate a non-prefetching resource
+ from a pre-fetching area */
+ if ((r->flags & IORESOURCE_PREFETCH) &&
+ !(res->flags & IORESOURCE_PREFETCH))
+ continue;
+
+ /* The bridge resources are special, as their
+ size != alignment. Sizing routines return
+ required alignment in the "start" field. */
+ align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start;
+
+ /* Ok, try it out.. */
+ if (allocate_resource(r, res, size, min, -1, align,
+ pcibios_align_resource, dev) < 0)
+ continue;
+
+ /* Update PCI config space. */
+ pcibios_update_resource(dev, r, res, resno);
+ return 0;
+ }
+ return -EBUSY;
+}
+
+int
+pci_assign_resource(struct pci_dev *dev, int i)
+{
+ const struct pci_bus *bus = dev->bus;
+ struct resource *res = dev->resource + i;
+ unsigned long size, min;
+
+ size = res->end - res->start + 1;
+ min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
+
+ /* First, try exact prefetching match.. */
+ if (pci_assign_bus_resource(bus, dev, res, size, min, IORESOURCE_PREFETCH, i) < 0) {
+ /*
+ * That failed.
+ *
+ * But a prefetching area can handle a non-prefetching
+ * window (it will just not perform as well).
+ */
+ if (!(res->flags & IORESOURCE_PREFETCH) || pci_assign_bus_resource(bus, dev, res, size, min, 0, i) < 0) {
+ printk(KERN_ERR "PCI: Failed to allocate resource %d(%lx-%lx) for %s\n",
+ i, res->start, res->end, dev->slot_name);
+ return -EBUSY;
+ }
+ }
+
+ DBGC((KERN_ERR " got res[%lx:%lx] for resource %d of %s\n", res->start,
+ res->end, i, dev->name));
+
+ return 0;
+}
+
+/* Sort resources by alignment */
+void __init
+pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
+{
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r;
+ struct resource_list *list, *tmp;
+ unsigned long r_align;
+
+ r = &dev->resource[i];
+ r_align = r->end - r->start;
+
+ if (!(r->flags) || r->parent)
+ continue;
+ if (!r_align) {
+ printk(KERN_WARNING "PCI: Ignore bogus resource %d "
+ "[%lx:%lx] of %s\n",
+ i, r->start, r->end, dev->name);
+ continue;
+ }
+ r_align = (i < PCI_BRIDGE_RESOURCES) ? r_align + 1 : r->start;
+ for (list = head; ; list = list->next) {
+ unsigned long align = 0;
+ struct resource_list *ln = list->next;
+ int idx;
+
+ if (ln) {
+ idx = ln->res - &ln->dev->resource[0];
+ align = (idx < PCI_BRIDGE_RESOURCES) ?
+ ln->res->end - ln->res->start + 1 :
+ ln->res->start;
+ }
+ if (r_align > align) {
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ panic("pdev_sort_resources(): "
+ "kmalloc() failed!\n");
+ tmp->next = ln;
+ tmp->res = r;
+ tmp->dev = dev;
+ list->next = tmp;
+ break;
+ }
+ }
+ }
+}
+
+void __init
+pdev_enable_device(struct pci_dev *dev)
+{
+ u32 reg;
+ u16 cmd;
+ int i;
+
+ DBGC((KERN_ERR "PCI enable device: (%s)\n", dev->name));
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+
+ if (res->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ else if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+
+ /* Special case, disable the ROM. Several devices act funny
+ (ie. do not respond to memory space writes) when it is left
+ enabled. A good example are QlogicISP adapters. */
+
+ if (dev->rom_base_reg) {
+ pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+ reg &= ~PCI_ROM_ADDRESS_ENABLE;
+ pci_write_config_dword(dev, dev->rom_base_reg, reg);
+ dev->resource[PCI_ROM_RESOURCE].flags &= ~PCI_ROM_ADDRESS_ENABLE;
+ }
+
+ /* All of these (may) have I/O scattered all around and may not
+ use I/O base address registers at all. So we just have to
+ always enable IO to these devices. */
+ if ((dev->class >> 8) == PCI_CLASS_NOT_DEFINED
+ || (dev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA
+ || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE
+ || (dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ cmd |= PCI_COMMAND_IO;
+ }
+
+ /* ??? Always turn on bus mastering. If the device doesn't support
+ it, the bit will go into the bucket. */
+ cmd |= PCI_COMMAND_MASTER;
+
+ /* Set the cache line and default latency (32). */
+ pci_write_config_word(dev, PCI_CACHE_LINE_SIZE,
+ (32 << 8) | (L1_CACHE_BYTES / sizeof(u32)));
+
+ /* Enable the appropriate bits in the PCI command register. */
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+ DBGC((KERN_ERR " cmd reg 0x%x\n", cmd));
+}
diff --git a/xen/drivers/pci/syscall.c b/xen/drivers/pci/syscall.c
new file mode 100644
index 0000000000..c935efd9a9
--- /dev/null
+++ b/xen/drivers/pci/syscall.c
@@ -0,0 +1,144 @@
+/*
+ * pci_syscall.c
+ *
+ * For architectures where we want to allow direct access
+ * to the PCI config stuff - it would probably be preferable
+ * on PCs too, but there people just do it by hand with the
+ * magic northbridge registers..
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+
+
+asmlinkage long
+sys_pciconfig_read(unsigned long bus, unsigned long dfn,
+ unsigned long off, unsigned long len, void *buf)
+{
+ struct pci_dev *dev;
+ u8 byte;
+ u16 word;
+ u32 dword;
+ long err, cfg_ret;
+
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto error;
+
+ err = -ENODEV;
+ dev = pci_find_slot(bus, dfn);
+ if (!dev)
+ goto error;
+
+ lock_kernel();
+ switch (len) {
+ case 1:
+ cfg_ret = pci_read_config_byte(dev, off, &byte);
+ break;
+ case 2:
+ cfg_ret = pci_read_config_word(dev, off, &word);
+ break;
+ case 4:
+ cfg_ret = pci_read_config_dword(dev, off, &dword);
+ break;
+ default:
+ err = -EINVAL;
+ unlock_kernel();
+ goto error;
+ };
+ unlock_kernel();
+
+ err = -EIO;
+ if (cfg_ret != PCIBIOS_SUCCESSFUL)
+ goto error;
+
+ switch (len) {
+ case 1:
+ err = put_user(byte, (unsigned char *)buf);
+ break;
+ case 2:
+ err = put_user(word, (unsigned short *)buf);
+ break;
+ case 4:
+ err = put_user(dword, (unsigned int *)buf);
+ break;
+ };
+ return err;
+
+error:
+ /* ??? XFree86 doesn't even check the return value. They
+ just look for 0xffffffff in the output, since that's what
+ they get instead of a machine check on x86. */
+ switch (len) {
+ case 1:
+ put_user(-1, (unsigned char *)buf);
+ break;
+ case 2:
+ put_user(-1, (unsigned short *)buf);
+ break;
+ case 4:
+ put_user(-1, (unsigned int *)buf);
+ break;
+ };
+ return err;
+}
+
+asmlinkage long
+sys_pciconfig_write(unsigned long bus, unsigned long dfn,
+ unsigned long off, unsigned long len, void *buf)
+{
+ struct pci_dev *dev;
+ u8 byte;
+ u16 word;
+ u32 dword;
+ int err = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (!pcibios_present())
+ return -ENOSYS;
+
+ dev = pci_find_slot(bus, dfn);
+ if (!dev)
+ return -ENODEV;
+
+ lock_kernel();
+ switch(len) {
+ case 1:
+ err = get_user(byte, (u8 *)buf);
+ if (err)
+ break;
+ err = pci_write_config_byte(dev, off, byte);
+ if (err != PCIBIOS_SUCCESSFUL)
+ err = -EIO;
+ break;
+
+ case 2:
+ err = get_user(word, (u16 *)buf);
+ if (err)
+ break;
+ err = pci_write_config_word(dev, off, word);
+ if (err != PCIBIOS_SUCCESSFUL)
+ err = -EIO;
+ break;
+
+ case 4:
+ err = get_user(dword, (u32 *)buf);
+ if (err)
+ break;
+ err = pci_write_config_dword(dev, off, dword);
+ if (err != PCIBIOS_SUCCESSFUL)
+ err = -EIO;
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ };
+ unlock_kernel();
+
+ return err;
+}
diff --git a/xen/drivers/scsi/Makefile b/xen/drivers/scsi/Makefile
new file mode 100644
index 0000000000..5b480bdf53
--- /dev/null
+++ b/xen/drivers/scsi/Makefile
@@ -0,0 +1,11 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(MAKE) -C aacraid
+ $(LD) -r -o driver.o $(OBJS) aacraid/aacraid.o
+# $(LD) -r -o driver.o $(OBJS)
+
+clean:
+ $(MAKE) -C aacraid clean
+ rm -f *.o *~ core
diff --git a/xen/drivers/scsi/aacraid/Makefile b/xen/drivers/scsi/aacraid/Makefile
new file mode 100644
index 0000000000..7d802c3bc9
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/Makefile
@@ -0,0 +1,17 @@
+
+include $(BASEDIR)/Rules.mk
+
+CFLAGS += -I$(BASEDIR)/drivers/scsi
+
+
+# -y := linit.o aachba.o commctrl.o comminit.o commsup.o \
+# dpcsup.o rx.o sa.o
+
+default: $(OBJS)
+ $(LD) -r -o aacraid.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
+
+
+
diff --git a/xen/drivers/scsi/aacraid/README b/xen/drivers/scsi/aacraid/README
new file mode 100644
index 0000000000..9f73c6719b
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/README
@@ -0,0 +1,42 @@
+AACRAID Driver for Linux (take two)
+
+Introduction
+-------------------------
+The aacraid driver adds support for Adaptec (http://www.adaptec.com)
+RAID controllers. This is a major rewrite from the original
+Adaptec supplied driver. It has signficantly cleaned up both the code
+and the running binary size (the module is less than half the size of
+the original).
+
+Supported Cards/Chipsets
+-------------------------
+ Dell Computer Corporation PERC 2 Quad Channel
+ Dell Computer Corporation PERC 2/Si
+ Dell Computer Corporation PERC 3/Si
+ Dell Computer Corporation PERC 3/Di
+ HP NetRAID-4M
+ ADAPTEC 2120S
+ ADAPTEC 2200S
+ ADAPTEC 5400S
+
+People
+-------------------------
+Alan Cox <alan@redhat.com>
+Christoph Hellwig <hch@infradead.org> (small cleanups/fixes)
+Matt Domsch <matt_domsch@dell.com> (revision ioctl, adapter messages)
+Deanna Bonds <deanna_bonds@adaptec.com> (non-DASD support, PAE fibs and 64 bit, added new adaptec controllers
+ added new ioctls, changed scsi interface to use new error handler,
+ increased the number of fibs and outstanding commands to a container)
+
+Original Driver
+-------------------------
+Adaptec Unix OEM Product Group
+
+Mailing List
+-------------------------
+None currently. Also note this is very different to Brian's original driver
+so don't expect him to support it.
+Adaptec does support this driver. Contact either tech support or deanna bonds.
+
+Original by Brian Boerner February 2001
+Rewritten by Alan Cox, November 2001
diff --git a/xen/drivers/scsi/aacraid/TODO b/xen/drivers/scsi/aacraid/TODO
new file mode 100644
index 0000000000..6f71022413
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/TODO
@@ -0,0 +1,4 @@
+o Testing
+o More testing
+o Feature request: display the firmware/bios/etc revisions in the
+ /proc info
diff --git a/xen/drivers/scsi/aacraid/aachba.c b/xen/drivers/scsi/aacraid/aachba.c
new file mode 100644
index 0000000000..21fc4259b8
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/aachba.c
@@ -0,0 +1,1685 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/* #include <xeno/spinlock.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/completion.h> */
+/* #include <asm/semaphore.h> */
+#include <asm/uaccess.h>
+#define MAJOR_NR SCSI_DISK0_MAJOR /* For DEVICE_NR() */
+#include <linux/blk.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+
+#include "aacraid.h"
+
+/* SCSI Commands */
+/* TODO: dmb - use the ones defined in include/scsi/scsi.h */
+
+#define SS_TEST 0x00 /* Test unit ready */
+#define SS_REZERO 0x01 /* Rezero unit */
+#define SS_REQSEN 0x03 /* Request Sense */
+#define SS_REASGN 0x07 /* Reassign blocks */
+#define SS_READ 0x08 /* Read 6 */
+#define SS_WRITE 0x0A /* Write 6 */
+#define SS_INQUIR 0x12 /* inquiry */
+#define SS_ST_SP 0x1B /* Start/Stop unit */
+#define SS_LOCK 0x1E /* prevent/allow medium removal */
+#define SS_RESERV 0x16 /* Reserve */
+#define SS_RELES 0x17 /* Release */
+#define SS_MODESEN 0x1A /* Mode Sense 6 */
+#define SS_RDCAP 0x25 /* Read Capacity */
+#define SM_READ 0x28 /* Read 10 */
+#define SM_WRITE 0x2A /* Write 10 */
+#define SS_SEEK 0x2B /* Seek */
+
+/* values for inqd_pdt: Peripheral device type in plain English */
+#define INQD_PDT_DA 0x00 /* Direct-access (DISK) device */
+#define INQD_PDT_PROC 0x03 /* Processor device */
+#define INQD_PDT_CHNGR 0x08 /* Changer (jukebox, scsi2) */
+#define INQD_PDT_COMM 0x09 /* Communication device (scsi2) */
+#define INQD_PDT_NOLUN2 0x1f /* Unknown Device (scsi2) */
+#define INQD_PDT_NOLUN 0x7f /* Logical Unit Not Present */
+
+#define INQD_PDT_DMASK 0x1F /* Peripheral Device Type Mask */
+#define INQD_PDT_QMASK 0xE0 /* Peripheral Device Qualifer Mask */
+
+#define TARGET_LUN_TO_CONTAINER(target, lun) (target)
+#define CONTAINER_TO_TARGET(cont) ((cont))
+#define CONTAINER_TO_LUN(cont) (0)
+
+#define MAX_FIB_DATA (sizeof(struct hw_fib) - sizeof(FIB_HEADER))
+
+#define MAX_DRIVER_SG_SEGMENT_COUNT 17
+
+/*
+ * Sense keys
+ */
+#define SENKEY_NO_SENSE 0x00
+#define SENKEY_UNDEFINED 0x01
+#define SENKEY_NOT_READY 0x02
+#define SENKEY_MEDIUM_ERR 0x03
+#define SENKEY_HW_ERR 0x04
+#define SENKEY_ILLEGAL 0x05
+#define SENKEY_ATTENTION 0x06
+#define SENKEY_PROTECTED 0x07
+#define SENKEY_BLANK 0x08
+#define SENKEY_V_UNIQUE 0x09
+#define SENKEY_CPY_ABORT 0x0A
+#define SENKEY_ABORT 0x0B
+#define SENKEY_EQUAL 0x0C
+#define SENKEY_VOL_OVERFLOW 0x0D
+#define SENKEY_MISCOMP 0x0E
+#define SENKEY_RESERVED 0x0F
+
+/*
+ * Sense codes
+ */
+
+#define SENCODE_NO_SENSE 0x00
+#define SENCODE_END_OF_DATA 0x00
+#define SENCODE_BECOMING_READY 0x04
+#define SENCODE_INIT_CMD_REQUIRED 0x04
+#define SENCODE_PARAM_LIST_LENGTH_ERROR 0x1A
+#define SENCODE_INVALID_COMMAND 0x20
+#define SENCODE_LBA_OUT_OF_RANGE 0x21
+#define SENCODE_INVALID_CDB_FIELD 0x24
+#define SENCODE_LUN_NOT_SUPPORTED 0x25
+#define SENCODE_INVALID_PARAM_FIELD 0x26
+#define SENCODE_PARAM_NOT_SUPPORTED 0x26
+#define SENCODE_PARAM_VALUE_INVALID 0x26
+#define SENCODE_RESET_OCCURRED 0x29
+#define SENCODE_LUN_NOT_SELF_CONFIGURED_YET 0x3E
+#define SENCODE_INQUIRY_DATA_CHANGED 0x3F
+#define SENCODE_SAVING_PARAMS_NOT_SUPPORTED 0x39
+#define SENCODE_DIAGNOSTIC_FAILURE 0x40
+#define SENCODE_INTERNAL_TARGET_FAILURE 0x44
+#define SENCODE_INVALID_MESSAGE_ERROR 0x49
+#define SENCODE_LUN_FAILED_SELF_CONFIG 0x4c
+#define SENCODE_OVERLAPPED_COMMAND 0x4E
+
+/*
+ * Additional sense codes
+ */
+
+#define ASENCODE_NO_SENSE 0x00
+#define ASENCODE_END_OF_DATA 0x05
+#define ASENCODE_BECOMING_READY 0x01
+#define ASENCODE_INIT_CMD_REQUIRED 0x02
+#define ASENCODE_PARAM_LIST_LENGTH_ERROR 0x00
+#define ASENCODE_INVALID_COMMAND 0x00
+#define ASENCODE_LBA_OUT_OF_RANGE 0x00
+#define ASENCODE_INVALID_CDB_FIELD 0x00
+#define ASENCODE_LUN_NOT_SUPPORTED 0x00
+#define ASENCODE_INVALID_PARAM_FIELD 0x00
+#define ASENCODE_PARAM_NOT_SUPPORTED 0x01
+#define ASENCODE_PARAM_VALUE_INVALID 0x02
+#define ASENCODE_RESET_OCCURRED 0x00
+#define ASENCODE_LUN_NOT_SELF_CONFIGURED_YET 0x00
+#define ASENCODE_INQUIRY_DATA_CHANGED 0x03
+#define ASENCODE_SAVING_PARAMS_NOT_SUPPORTED 0x00
+#define ASENCODE_DIAGNOSTIC_FAILURE 0x80
+#define ASENCODE_INTERNAL_TARGET_FAILURE 0x00
+#define ASENCODE_INVALID_MESSAGE_ERROR 0x00
+#define ASENCODE_LUN_FAILED_SELF_CONFIG 0x00
+#define ASENCODE_OVERLAPPED_COMMAND 0x00
+
+#define BYTE0(x) (unsigned char)(x)
+#define BYTE1(x) (unsigned char)((x) >> 8)
+#define BYTE2(x) (unsigned char)((x) >> 16)
+#define BYTE3(x) (unsigned char)((x) >> 24)
+
+/*------------------------------------------------------------------------------
+ * S T R U C T S / T Y P E D E F S
+ *----------------------------------------------------------------------------*/
+/* SCSI inquiry data */
+struct inquiry_data {
+ u8 inqd_pdt; /* Peripheral qualifier | Peripheral Device Type */
+ u8 inqd_dtq; /* RMB | Device Type Qualifier */
+ u8 inqd_ver; /* ISO version | ECMA version | ANSI-approved version */
+ u8 inqd_rdf; /* AENC | TrmIOP | Response data format */
+ u8 inqd_len; /* Additional length (n-4) */
+ u8 inqd_pad1[2]; /* Reserved - must be zero */
+ u8 inqd_pad2; /* RelAdr | WBus32 | WBus16 | Sync | Linked |Reserved| CmdQue | SftRe */
+ u8 inqd_vid[8]; /* Vendor ID */
+ u8 inqd_pid[16]; /* Product ID */
+ u8 inqd_prl[4]; /* Product Revision Level */
+};
+
+struct sense_data {
+ u8 error_code; /* 70h (current errors), 71h(deferred errors) */
+ u8 valid:1; /* A valid bit of one indicates that the information */
+ /* field contains valid information as defined in the
+ * SCSI-2 Standard.
+ */
+ u8 segment_number; /* Only used for COPY, COMPARE, or COPY AND VERIFY Commands */
+ u8 sense_key:4; /* Sense Key */
+ u8 reserved:1;
+ u8 ILI:1; /* Incorrect Length Indicator */
+ u8 EOM:1; /* End Of Medium - reserved for random access devices */
+ u8 filemark:1; /* Filemark - reserved for random access devices */
+
+ u8 information[4]; /* for direct-access devices, contains the unsigned
+ * logical block address or residue associated with
+ * the sense key
+ */
+ u8 add_sense_len; /* number of additional sense bytes to follow this field */
+ u8 cmnd_info[4]; /* not used */
+ u8 ASC; /* Additional Sense Code */
+ u8 ASCQ; /* Additional Sense Code Qualifier */
+ u8 FRUC; /* Field Replaceable Unit Code - not used */
+ u8 bit_ptr:3; /* indicates which byte of the CDB or parameter data
+ * was in error
+ */
+ u8 BPV:1; /* bit pointer valid (BPV): 1- indicates that
+ * the bit_ptr field has valid value
+ */
+ u8 reserved2:2;
+ u8 CD:1; /* command data bit: 1- illegal parameter in CDB.
+ * 0- illegal parameter in data.
+ */
+ u8 SKSV:1;
+ u8 field_ptr[2]; /* byte of the CDB or parameter data in error */
+};
+
+/*
+ * M O D U L E G L O B A L S
+ */
+
+static struct fsa_scsi_hba *fsa_dev[MAXIMUM_NUM_ADAPTERS]; /* SCSI Device
+ Instance Ptrs */
+static struct sense_data sense_data[MAXIMUM_NUM_CONTAINERS];
+static void get_sd_devname(int disknum, char *buffer);
+static unsigned long aac_build_sg(Scsi_Cmnd* scsicmd, struct sgmap* sgmap);
+static unsigned long aac_build_sg64(Scsi_Cmnd* scsicmd, struct sgmap64* psg);
+static int aac_send_srb_fib(Scsi_Cmnd* scsicmd);
+#ifdef AAC_DETAILED_STATUS_INFO
+static char *aac_get_status_string(u32 status);
+#endif
+
+/**
+ * aac_get_containers - list containers
+ * @common: adapter to probe
+ *
+ * Make a list of all containers on this controller
+ */
+int aac_get_containers(struct aac_dev *dev)
+{
+ struct fsa_scsi_hba *fsa_dev_ptr;
+ u32 index, status = 0;
+ struct aac_query_mount *dinfo;
+ struct aac_mount *dresp;
+ struct fib * fibptr;
+ unsigned instance;
+
+ fsa_dev_ptr = &(dev->fsa_dev);
+ instance = dev->scsi_host_ptr->unique_id;
+
+ if (!(fibptr = fib_alloc(dev)))
+ return -ENOMEM;
+
+ for (index = 0; index < MAXIMUM_NUM_CONTAINERS; index++) {
+ fib_init(fibptr);
+ dinfo = (struct aac_query_mount *) fib_data(fibptr);
+
+ dinfo->command = cpu_to_le32(VM_NameServe);
+ dinfo->count = cpu_to_le32(index);
+ dinfo->type = cpu_to_le32(FT_FILESYS);
+
+ printk("aac_get_container: getting info for container %d\n", index);
+ status = fib_send(ContainerCommand,
+ fibptr,
+ sizeof (struct aac_query_mount),
+ FsaNormal,
+ 1, 1,
+ NULL, NULL);
+ if (status < 0 ) {
+ printk(KERN_WARNING "ProbeContainers: SendFIB failed.\n");
+ break;
+ }
+ dresp = (struct aac_mount *)fib_data(fibptr);
+
+ if ((le32_to_cpu(dresp->status) == ST_OK) &&
+ (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
+ fsa_dev_ptr->valid[index] = 1;
+ fsa_dev_ptr->type[index] = le32_to_cpu(dresp->mnt[0].vol);
+ fsa_dev_ptr->size[index] = le32_to_cpu(dresp->mnt[0].capacity);
+ if (le32_to_cpu(dresp->mnt[0].state) & FSCS_READONLY)
+ fsa_dev_ptr->ro[index] = 1;
+ }
+ fib_complete(fibptr);
+ /*
+ * If there are no more containers, then stop asking.
+ */
+ if ((index + 1) >= le32_to_cpu(dresp->count))
+ break;
+ }
+ fib_free(fibptr);
+ fsa_dev[instance] = fsa_dev_ptr;
+ return status;
+}
+
+/**
+ * probe_container - query a logical volume
+ * @dev: device to query
+ * @cid: container identifier
+ *
+ * Queries the controller about the given volume. The volume information
+ * is updated in the struct fsa_scsi_hba structure rather than returned.
+ */
+
+static int probe_container(struct aac_dev *dev, int cid)
+{
+ struct fsa_scsi_hba *fsa_dev_ptr;
+ int status;
+ struct aac_query_mount *dinfo;
+ struct aac_mount *dresp;
+ struct fib * fibptr;
+ unsigned instance;
+
+ fsa_dev_ptr = &(dev->fsa_dev);
+ instance = dev->scsi_host_ptr->unique_id;
+
+ if (!(fibptr = fib_alloc(dev)))
+ return -ENOMEM;
+
+ fib_init(fibptr);
+
+ dinfo = (struct aac_query_mount *)fib_data(fibptr);
+
+ dinfo->command = cpu_to_le32(VM_NameServe);
+ dinfo->count = cpu_to_le32(cid);
+ dinfo->type = cpu_to_le32(FT_FILESYS);
+
+ status = fib_send(ContainerCommand,
+ fibptr,
+ sizeof(struct aac_query_mount),
+ FsaNormal,
+ 1, 1,
+ NULL, NULL);
+ if (status < 0) {
+ printk(KERN_WARNING "aacraid: probe_containers query failed.\n");
+ goto error;
+ }
+
+ dresp = (struct aac_mount *) fib_data(fibptr);
+
+ if ((le32_to_cpu(dresp->status) == ST_OK) &&
+ (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
+ fsa_dev_ptr->valid[cid] = 1;
+ fsa_dev_ptr->type[cid] = le32_to_cpu(dresp->mnt[0].vol);
+ fsa_dev_ptr->size[cid] = le32_to_cpu(dresp->mnt[0].capacity);
+ if (le32_to_cpu(dresp->mnt[0].state) & FSCS_READONLY)
+ fsa_dev_ptr->ro[cid] = 1;
+ }
+
+ error:
+ fib_complete(fibptr);
+ fib_free(fibptr);
+
+ return status;
+}
+
+/* Local Structure to set SCSI inquiry data strings */
+struct scsi_inq {
+ char vid[8]; /* Vendor ID */
+ char pid[16]; /* Product ID */
+ char prl[4]; /* Product Revision Level */
+};
+
+/**
+ * InqStrCopy - string merge
+ * @a: string to copy from
+ * @b: string to copy to
+ *
+ * Copy a String from one location to another
+ * without copying \0
+ */
+
+static void inqstrcpy(char *a, char *b)
+{
+
+ while(*a != (char)0)
+ *b++ = *a++;
+}
+
+static char *container_types[] = {
+ "None",
+ "Volume",
+ "Mirror",
+ "Stripe",
+ "RAID5",
+ "SSRW",
+ "SSRO",
+ "Morph",
+ "Legacy",
+ "RAID4",
+ "RAID10",
+ "RAID00",
+ "V-MIRRORS",
+ "PSEUDO R4",
+ "RAID50",
+ "Unknown"
+};
+
+
+
+/* Function: setinqstr
+ *
+ * Arguments: [1] pointer to void [1] int
+ *
+ * Purpose: Sets SCSI inquiry data strings for vendor, product
+ * and revision level. Allows strings to be set in platform dependant
+ * files instead of in OS dependant driver source.
+ */
+
+static void setinqstr(int devtype, void *data, int tindex)
+{
+ struct scsi_inq *str;
+ char *findit;
+ struct aac_driver_ident *mp;
+
+ mp = aac_get_driver_ident(devtype);
+
+ str = (struct scsi_inq *)(data); /* cast data to scsi inq block */
+
+ inqstrcpy (mp->vname, str->vid);
+ inqstrcpy (mp->model, str->pid); /* last six chars reserved for vol type */
+
+ findit = str->pid;
+
+ for ( ; *findit != ' '; findit++); /* walk till we find a space then incr by 1 */
+ findit++;
+
+ if (tindex < (sizeof(container_types)/sizeof(char *))){
+ inqstrcpy (container_types[tindex], findit);
+ }
+ inqstrcpy ("V1.0", str->prl);
+}
+
+void set_sense(u8 *sense_buf, u8 sense_key, u8 sense_code,
+ u8 a_sense_code, u8 incorrect_length,
+ u8 bit_pointer, u16 field_pointer,
+ u32 residue)
+{
+ sense_buf[0] = 0xF0; /* Sense data valid, err code 70h (current error) */
+ sense_buf[1] = 0; /* Segment number, always zero */
+
+ if (incorrect_length) {
+ sense_buf[2] = sense_key | 0x20; /* Set ILI bit | sense key */
+ sense_buf[3] = BYTE3(residue);
+ sense_buf[4] = BYTE2(residue);
+ sense_buf[5] = BYTE1(residue);
+ sense_buf[6] = BYTE0(residue);
+ } else
+ sense_buf[2] = sense_key; /* Sense key */
+
+ if (sense_key == SENKEY_ILLEGAL)
+ sense_buf[7] = 10; /* Additional sense length */
+ else
+ sense_buf[7] = 6; /* Additional sense length */
+
+ sense_buf[12] = sense_code; /* Additional sense code */
+ sense_buf[13] = a_sense_code; /* Additional sense code qualifier */
+ if (sense_key == SENKEY_ILLEGAL) {
+ sense_buf[15] = 0;
+
+ if (sense_code == SENCODE_INVALID_PARAM_FIELD)
+ sense_buf[15] = 0x80; /* Std sense key specific field */
+ /* Illegal parameter is in the parameter block */
+
+ if (sense_code == SENCODE_INVALID_CDB_FIELD)
+ sense_buf[15] = 0xc0; /* Std sense key specific field */
+ /* Illegal parameter is in the CDB block */
+ sense_buf[15] |= bit_pointer;
+ sense_buf[16] = field_pointer >> 8; /* MSB */
+ sense_buf[17] = field_pointer; /* LSB */
+ }
+}
+
+static void aac_io_done(Scsi_Cmnd * scsicmd)
+{
+ unsigned long cpu_flags;
+ spin_lock_irqsave(&io_request_lock, cpu_flags);
+ scsicmd->scsi_done(scsicmd);
+ spin_unlock_irqrestore(&io_request_lock, cpu_flags);
+}
+
+static void __aac_io_done(Scsi_Cmnd * scsicmd)
+{
+ scsicmd->scsi_done(scsicmd);
+}
+
+int aac_get_adapter_info(struct aac_dev* dev)
+{
+ struct fib* fibptr;
+ struct aac_adapter_info* info;
+ int rcode;
+ u32 tmp;
+
+ if (!(fibptr = fib_alloc(dev)))
+ return -ENOMEM;
+
+ fib_init(fibptr);
+ info = (struct aac_adapter_info*) fib_data(fibptr);
+
+ memset(info,0,sizeof(struct aac_adapter_info));
+
+ rcode = fib_send(RequestAdapterInfo,
+ fibptr,
+ sizeof(struct aac_adapter_info),
+ FsaNormal,
+ 1, 1,
+ NULL,
+ NULL);
+
+ memcpy(&dev->adapter_info, info, sizeof(struct aac_adapter_info));
+
+ tmp = dev->adapter_info.kernelrev;
+ printk(KERN_INFO "%s%d: kernel %d.%d.%d build %d\n",
+ dev->name, dev->id,
+ tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+ dev->adapter_info.kernelbuild);
+ tmp = dev->adapter_info.monitorrev;
+ printk(KERN_INFO "%s%d: monitor %d.%d.%d build %d\n",
+ dev->name, dev->id,
+ tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+ dev->adapter_info.monitorbuild);
+ tmp = dev->adapter_info.biosrev;
+ printk(KERN_INFO "%s%d: bios %d.%d.%d build %d\n",
+ dev->name, dev->id,
+ tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+ dev->adapter_info.biosbuild);
+ printk(KERN_INFO "%s%d: serial %x%x\n",
+ dev->name, dev->id,
+ dev->adapter_info.serial[0],
+ dev->adapter_info.serial[1]);
+ dev->pae_support = 0;
+ dev->nondasd_support = 0;
+ if( BITS_PER_LONG >= 64 &&
+ (dev->adapter_info.options & AAC_OPT_SGMAP_HOST64)){
+ printk(KERN_INFO "%s%d: 64 Bit PAE enabled\n",
+ dev->name, dev->id);
+ dev->pae_support = 1;
+ }
+ /* TODO - dmb temporary until fw can set this bit */
+ dev->pae_support = (BITS_PER_LONG >= 64);
+ if(dev->pae_support != 0) {
+ printk(KERN_INFO "%s%d: 64 Bit PAE enabled\n",
+ dev->name, dev->id);
+ }
+
+ if(dev->adapter_info.options & AAC_OPT_NONDASD){
+ dev->nondasd_support = 1;
+ }
+ return rcode;
+}
+
+
+static void read_callback(void *context, struct fib * fibptr)
+{
+ struct aac_dev *dev;
+ struct aac_read_reply *readreply;
+ Scsi_Cmnd *scsicmd;
+ u32 lba;
+ u32 cid;
+
+ scsicmd = (Scsi_Cmnd *) context;
+
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+ cid =TARGET_LUN_TO_CONTAINER(scsicmd->target, scsicmd->lun);
+
+ lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+ dprintk((KERN_DEBUG "read_callback[cpu %d]: lba = %d, t = %ld.\n", smp_processor_id(), lba, jiffies));
+
+ if (fibptr == NULL)
+ BUG();
+
+ if(scsicmd->use_sg)
+ pci_unmap_sg(dev->pdev,
+ (struct scatterlist *)scsicmd->buffer,
+ scsicmd->use_sg,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ else if(scsicmd->request_bufflen)
+ pci_unmap_single(dev->pdev, (dma_addr_t)(unsigned long)scsicmd->SCp.ptr,
+ scsicmd->request_bufflen,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ readreply = (struct aac_read_reply *)fib_data(fibptr);
+ if (le32_to_cpu(readreply->status) == ST_OK)
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ else {
+ printk(KERN_WARNING "read_callback: read failed, status = %d\n", readreply->status);
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | CHECK_CONDITION;
+ set_sense((u8 *) &sense_data[cid],
+ SENKEY_HW_ERR,
+ SENCODE_INTERNAL_TARGET_FAILURE,
+ ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
+ 0, 0);
+ }
+ fib_complete(fibptr);
+ fib_free(fibptr);
+
+ aac_io_done(scsicmd);
+}
+
+static void write_callback(void *context, struct fib * fibptr)
+{
+ struct aac_dev *dev;
+ struct aac_write_reply *writereply;
+ Scsi_Cmnd *scsicmd;
+ u32 lba;
+ u32 cid;
+
+ scsicmd = (Scsi_Cmnd *) context;
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+ cid = TARGET_LUN_TO_CONTAINER(scsicmd->target, scsicmd->lun);
+
+ lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+ dprintk((KERN_DEBUG "write_callback[cpu %d]: lba = %d, t = %ld.\n", smp_processor_id(), lba, jiffies));
+ if (fibptr == NULL)
+ BUG();
+
+ if(scsicmd->use_sg)
+ pci_unmap_sg(dev->pdev,
+ (struct scatterlist *)scsicmd->buffer,
+ scsicmd->use_sg,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ else if(scsicmd->request_bufflen)
+ pci_unmap_single(dev->pdev, (dma_addr_t)(unsigned long)scsicmd->SCp.ptr,
+ scsicmd->request_bufflen,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+
+ writereply = (struct aac_write_reply *) fib_data(fibptr);
+ if (le32_to_cpu(writereply->status) == ST_OK)
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ else {
+ printk(KERN_WARNING "write_callback: write failed, status = %d\n", writereply->status);
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | CHECK_CONDITION;
+ set_sense((u8 *) &sense_data[cid],
+ SENKEY_HW_ERR,
+ SENCODE_INTERNAL_TARGET_FAILURE,
+ ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
+ 0, 0);
+ }
+
+ fib_complete(fibptr);
+ fib_free(fibptr);
+ aac_io_done(scsicmd);
+}
+
+int aac_read(Scsi_Cmnd * scsicmd, int cid)
+{
+ u32 lba;
+ u32 count;
+ int status;
+
+ u16 fibsize;
+ struct aac_dev *dev;
+ struct fib * cmd_fibcontext;
+
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+ /*
+ * Get block address and transfer length
+ */
+ if (scsicmd->cmnd[0] == SS_READ) /* 6 byte command */
+ {
+ dprintk((KERN_DEBUG "aachba: received a read(6) command on target %d.\n", cid));
+
+ lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+ count = scsicmd->cmnd[4];
+
+ if (count == 0)
+ count = 256;
+ } else {
+ dprintk((KERN_DEBUG "aachba: received a read(10) command on target %d.\n", cid));
+
+ lba = (scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
+ }
+ dprintk((KERN_DEBUG "aac_read[cpu %d]: lba = %u, t = %ld.\n", smp_processor_id(), lba, jiffies));
+ /*
+ * Alocate and initialize a Fib
+ */
+ if (!(cmd_fibcontext = fib_alloc(dev))) {
+ scsicmd->result = DID_ERROR << 16;
+ aac_io_done(scsicmd);
+ return (-1);
+ }
+
+ fib_init(cmd_fibcontext);
+
+ if(dev->pae_support == 1){
+ struct aac_read64 *readcmd;
+ readcmd = (struct aac_read64 *) fib_data(cmd_fibcontext);
+ readcmd->command = cpu_to_le32(VM_CtHostRead64);
+ readcmd->cid = cpu_to_le16(cid);
+ readcmd->sector_count = cpu_to_le16(count);
+ readcmd->block = cpu_to_le32(lba);
+ readcmd->pad = cpu_to_le16(0);
+ readcmd->flags = cpu_to_le16(0);
+
+ aac_build_sg64(scsicmd, &readcmd->sg);
+ if(readcmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+ BUG();
+ fibsize = sizeof(struct aac_read64) +
+ ((readcmd->sg.count - 1) * sizeof (struct sgentry64));
+ /*
+ * Now send the Fib to the adapter
+ */
+ status = fib_send(ContainerCommand64,
+ cmd_fibcontext,
+ fibsize,
+ FsaNormal,
+ 0, 1,
+ (fib_callback) read_callback,
+ (void *) scsicmd);
+ } else {
+ struct aac_read *readcmd;
+ readcmd = (struct aac_read *) fib_data(cmd_fibcontext);
+ readcmd->command = cpu_to_le32(VM_CtBlockRead);
+ readcmd->cid = cpu_to_le32(cid);
+ readcmd->block = cpu_to_le32(lba);
+ readcmd->count = cpu_to_le32(count * 512);
+
+ if (count * 512 > (64 * 1024))
+ BUG();
+
+ aac_build_sg(scsicmd, &readcmd->sg);
+ if(readcmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+ BUG();
+ fibsize = sizeof(struct aac_read) +
+ ((readcmd->sg.count - 1) * sizeof (struct sgentry));
+ /*
+ * Now send the Fib to the adapter
+ */
+ status = fib_send(ContainerCommand,
+ cmd_fibcontext,
+ fibsize,
+ FsaNormal,
+ 0, 1,
+ (fib_callback) read_callback,
+ (void *) scsicmd);
+ }
+
+
+ /*
+ * Check that the command queued to the controller
+ */
+ if (status == -EINPROGRESS)
+ return 0;
+
+ printk(KERN_WARNING "aac_read: fib_send failed with status: %d.\n",
+ status);
+ /*
+ * For some reason, the Fib didn't queue, return QUEUE_FULL
+ */
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL;
+ aac_io_done(scsicmd);
+ fib_complete(cmd_fibcontext);
+ fib_free(cmd_fibcontext);
+ return -1;
+}
+
+static int aac_write(Scsi_Cmnd * scsicmd, int cid)
+{
+ u32 lba;
+ u32 count;
+ int status;
+ u16 fibsize;
+ struct aac_dev *dev;
+ struct fib * cmd_fibcontext;
+
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+ /*
+ * Get block address and transfer length
+ */
+ if (scsicmd->cmnd[0] == SS_WRITE) /* 6 byte command */
+ {
+ lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+ count = scsicmd->cmnd[4];
+ if (count == 0)
+ count = 256;
+ } else {
+ dprintk((KERN_DEBUG "aachba: received a write(10) command on target %d.\n", cid));
+ lba = (scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
+ }
+ dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %u, t = %ld.\n",
+ smp_processor_id(), lba, jiffies));
+ /*
+ * Allocate and initialize a Fib then setup a BlockWrite command
+ */
+ if (!(cmd_fibcontext = fib_alloc(dev))) {
+ scsicmd->result = DID_ERROR << 16;
+ aac_io_done(scsicmd);
+ return -1;
+ }
+ fib_init(cmd_fibcontext);
+
+ if(dev->pae_support == 1)
+ {
+ struct aac_write64 *writecmd;
+ writecmd = (struct aac_write64 *) fib_data(cmd_fibcontext);
+ writecmd->command = cpu_to_le32(VM_CtHostWrite64);
+ writecmd->cid = cpu_to_le16(cid);
+ writecmd->sector_count = cpu_to_le16(count);
+ writecmd->block = cpu_to_le32(lba);
+ writecmd->pad = cpu_to_le16(0);
+ writecmd->flags = cpu_to_le16(0);
+
+ aac_build_sg64(scsicmd, &writecmd->sg);
+ if(writecmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+ BUG();
+ fibsize = sizeof(struct aac_write64) +
+ ((writecmd->sg.count - 1) * sizeof (struct sgentry64));
+ /*
+ * Now send the Fib to the adapter
+ */
+ status = fib_send(ContainerCommand64,
+ cmd_fibcontext,
+ fibsize,
+ FsaNormal,
+ 0, 1,
+ (fib_callback) write_callback,
+ (void *) scsicmd);
+ }
+ else
+ {
+ struct aac_write *writecmd;
+ writecmd = (struct aac_write *) fib_data(cmd_fibcontext);
+ writecmd->command = cpu_to_le32(VM_CtBlockWrite);
+ writecmd->cid = cpu_to_le32(cid);
+ writecmd->block = cpu_to_le32(lba);
+ writecmd->count = cpu_to_le32(count * 512);
+ writecmd->sg.count = cpu_to_le32(1);
+ /* ->stable is not used - it did mean which type of write */
+
+ if (count * 512 > (64 * 1024))
+ BUG();
+ aac_build_sg(scsicmd, &writecmd->sg);
+ if(writecmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+ BUG();
+ fibsize = sizeof(struct aac_write) +
+ ((writecmd->sg.count - 1) * sizeof (struct sgentry));
+ /*
+ * Now send the Fib to the adapter
+ */
+ status = fib_send(ContainerCommand,
+ cmd_fibcontext,
+ fibsize,
+ FsaNormal,
+ 0, 1,
+ (fib_callback) write_callback,
+ (void *) scsicmd);
+ }
+
+ /*
+ * Check that the command queued to the controller
+ */
+ if (status == -EINPROGRESS)
+ return 0;
+
+ printk(KERN_WARNING "aac_write: fib_send failed with status: %d\n", status);
+ /*
+ * For some reason, the Fib didn't queue, return QUEUE_FULL
+ */
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL;
+ aac_io_done(scsicmd);
+
+ fib_complete(cmd_fibcontext);
+ fib_free(cmd_fibcontext);
+ return -1;
+}
+
+
+/**
+ * aac_scsi_cmd() - Process SCSI command
+ * @scsicmd: SCSI command block
+ * @wait: 1 if the user wants to await completion
+ *
+ * Emulate a SCSI command and queue the required request for the
+ * aacraid firmware.
+ */
+
+int aac_scsi_cmd(Scsi_Cmnd * scsicmd)
+{
+ u32 cid = 0;
+ struct fsa_scsi_hba *fsa_dev_ptr;
+ int cardtype;
+ int ret;
+ struct aac_dev *dev = (struct aac_dev *)scsicmd->host->hostdata;
+
+ cardtype = dev->cardtype;
+
+ fsa_dev_ptr = fsa_dev[scsicmd->host->unique_id];
+
+ /*
+ * If the bus, target or lun is out of range, return fail
+ * Test does not apply to ID 16, the pseudo id for the controller
+ * itself.
+ */
+ if (scsicmd->target != scsicmd->host->this_id) {
+ if ((scsicmd->channel == 0) ){
+ if( (scsicmd->target >= AAC_MAX_TARGET) || (scsicmd->lun != 0)){
+ scsicmd->result = DID_NO_CONNECT << 16;
+ __aac_io_done(scsicmd);
+ return 0;
+ }
+ cid = TARGET_LUN_TO_CONTAINER(scsicmd->target, scsicmd->lun);
+
+ /*
+ * If the target container doesn't exist, it may have
+ * been newly created
+ */
+ if (fsa_dev_ptr->valid[cid] == 0) {
+ switch (scsicmd->cmnd[0]) {
+ case SS_INQUIR:
+ case SS_RDCAP:
+ case SS_TEST:
+ spin_unlock_irq(&io_request_lock);
+ probe_container(dev, cid);
+ spin_lock_irq(&io_request_lock);
+ if (fsa_dev_ptr->valid[cid] == 0) {
+ scsicmd->result = DID_NO_CONNECT << 16;
+ __aac_io_done(scsicmd);
+ return 0;
+ }
+ default:
+ break;
+ }
+ }
+ /*
+ * If the target container still doesn't exist,
+ * return failure
+ */
+ if (fsa_dev_ptr->valid[cid] == 0) {
+ scsicmd->result = DID_BAD_TARGET << 16;
+ __aac_io_done(scsicmd);
+ return -1;
+ }
+ } else { /* check for physical non-dasd devices */
+ if(dev->nondasd_support == 1){
+ return aac_send_srb_fib(scsicmd);
+ } else {
+ scsicmd->result = DID_NO_CONNECT << 16;
+ __aac_io_done(scsicmd);
+ return 0;
+ }
+ }
+ }
+ /*
+ * else Command for the controller itself
+ */
+ else if ((scsicmd->cmnd[0] != SS_INQUIR) &&
+ (scsicmd->cmnd[0] != SS_TEST))
+ {
+ /* only INQUIRY & TUR cmnd supported for controller */
+ dprintk((KERN_WARNING "Only INQUIRY & TUR command supported for "
+ "controller, rcvd = 0x%x.\n", scsicmd->cmnd[0]));
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 |
+ CHECK_CONDITION;
+ set_sense((u8 *) &sense_data[cid],
+ SENKEY_ILLEGAL,
+ SENCODE_INVALID_COMMAND,
+ ASENCODE_INVALID_COMMAND, 0, 0, 0, 0);
+ __aac_io_done(scsicmd);
+ return -1;
+ }
+
+
+ /* Handle commands here that don't require going out to the adapter */
+ switch (scsicmd->cmnd[0]) {
+ case SS_INQUIR:
+ {
+ struct inquiry_data *inq_data_ptr;
+
+ dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", scsicmd->target));
+ inq_data_ptr = (struct inquiry_data *)scsicmd->request_buffer;
+ memset(inq_data_ptr, 0, sizeof (struct inquiry_data));
+
+ inq_data_ptr->inqd_ver = 2; /* claim compliance to SCSI-2 */
+ inq_data_ptr->inqd_dtq = 0x80; /* set RMB bit to one indicating that the medium is removable */
+ inq_data_ptr->inqd_rdf = 2; /* A response data format value of two indicates that the data shall be in the format specified in SCSI-2 */
+ inq_data_ptr->inqd_len = 31;
+ /*Format for "pad2" is RelAdr | WBus32 | WBus16 | Sync | Linked |Reserved| CmdQue | SftRe */
+ inq_data_ptr->inqd_pad2= 0x32 ; /*WBus16|Sync|CmdQue */
+ /*
+ * Set the Vendor, Product, and Revision Level
+ * see: <vendor>.c i.e. aac.c
+ */
+ setinqstr(cardtype, (void *) (inq_data_ptr->inqd_vid), fsa_dev_ptr->type[cid]);
+ if (scsicmd->target == scsicmd->host->this_id)
+ inq_data_ptr->inqd_pdt = INQD_PDT_PROC; /* Processor device */
+ else
+ inq_data_ptr->inqd_pdt = INQD_PDT_DA; /* Direct/random access device */
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ __aac_io_done(scsicmd);
+ return 0;
+ }
+ case SS_RDCAP:
+ {
+ int capacity;
+ char *cp;
+
+ dprintk((KERN_DEBUG "READ CAPACITY command.\n"));
+ capacity = fsa_dev_ptr->size[cid] - 1;
+ cp = scsicmd->request_buffer;
+ cp[0] = (capacity >> 24) & 0xff;
+ cp[1] = (capacity >> 16) & 0xff;
+ cp[2] = (capacity >> 8) & 0xff;
+ cp[3] = (capacity >> 0) & 0xff;
+ cp[4] = 0;
+ cp[5] = 0;
+ cp[6] = 2;
+ cp[7] = 0;
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ __aac_io_done(scsicmd);
+
+ return 0;
+ }
+
+ case SS_MODESEN:
+ {
+ char *mode_buf;
+
+ dprintk((KERN_DEBUG "MODE SENSE command.\n"));
+ mode_buf = scsicmd->request_buffer;
+ mode_buf[0] = 0; /* Mode data length (MSB) */
+ mode_buf[1] = 6; /* Mode data length (LSB) */
+ mode_buf[2] = 0; /* Medium type - default */
+ mode_buf[3] = 0; /* Device-specific param,
+ bit 8: 0/1 = write enabled/protected */
+ mode_buf[4] = 0; /* reserved */
+ mode_buf[5] = 0; /* reserved */
+ mode_buf[6] = 0; /* Block descriptor length (MSB) */
+ mode_buf[7] = 0; /* Block descriptor length (LSB) */
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ __aac_io_done(scsicmd);
+
+ return 0;
+ }
+ case SS_REQSEN:
+ dprintk((KERN_DEBUG "REQUEST SENSE command.\n"));
+ memcpy(scsicmd->sense_buffer, &sense_data[cid],
+ sizeof (struct sense_data));
+ memset(&sense_data[cid], 0, sizeof (struct sense_data));
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ __aac_io_done(scsicmd);
+ return (0);
+
+ case SS_LOCK:
+ dprintk((KERN_DEBUG "LOCK command.\n"));
+ if (scsicmd->cmnd[4])
+ fsa_dev_ptr->locked[cid] = 1;
+ else
+ fsa_dev_ptr->locked[cid] = 0;
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ __aac_io_done(scsicmd);
+ return 0;
+ /*
+ * These commands are all No-Ops
+ */
+ case SS_TEST:
+ case SS_RESERV:
+ case SS_RELES:
+ case SS_REZERO:
+ case SS_REASGN:
+ case SS_SEEK:
+ case SS_ST_SP:
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+ __aac_io_done(scsicmd);
+ return (0);
+ }
+
+ switch (scsicmd->cmnd[0])
+ {
+ case SS_READ:
+ case SM_READ:
+ /*
+ * Hack to keep track of ordinal number of the device that
+ * corresponds to a container. Needed to convert
+ * containers to /dev/sd device names
+ */
+
+ spin_unlock_irq(&io_request_lock);
+ fsa_dev_ptr->devno[cid] = DEVICE_NR(scsicmd->request.rq_dev);
+ ret = aac_read(scsicmd, cid);
+ spin_lock_irq(&io_request_lock);
+ return ret;
+
+ case SS_WRITE:
+ case SM_WRITE:
+ spin_unlock_irq(&io_request_lock);
+ ret = aac_write(scsicmd, cid);
+ spin_lock_irq(&io_request_lock);
+ return ret;
+ default:
+ /*
+ * Unhandled commands
+ */
+ printk(KERN_WARNING "Unhandled SCSI Command: 0x%x.\n",
+ scsicmd->cmnd[0]);
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 |
+ CHECK_CONDITION;
+ set_sense((u8 *) &sense_data[cid],
+ SENKEY_ILLEGAL, SENCODE_INVALID_COMMAND,
+ ASENCODE_INVALID_COMMAND, 0, 0, 0, 0);
+ __aac_io_done(scsicmd);
+ return -1;
+ }
+}
+
+static int query_disk(struct aac_dev *dev, void *arg)
+{
+ struct aac_query_disk qd;
+ struct fsa_scsi_hba *fsa_dev_ptr;
+
+ fsa_dev_ptr = &(dev->fsa_dev);
+ if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
+ return -EFAULT;
+ if (qd.cnum == -1)
+ qd.cnum = TARGET_LUN_TO_CONTAINER(qd.target, qd.lun);
+ else if ((qd.bus == -1) && (qd.target == -1) && (qd.lun == -1))
+ {
+ if (qd.cnum < 0 || qd.cnum > MAXIMUM_NUM_CONTAINERS)
+ return -EINVAL;
+ qd.instance = dev->scsi_host_ptr->host_no;
+ qd.bus = 0;
+ qd.target = CONTAINER_TO_TARGET(qd.cnum);
+ qd.lun = CONTAINER_TO_LUN(qd.cnum);
+ }
+ else return -EINVAL;
+
+ qd.valid = fsa_dev_ptr->valid[qd.cnum];
+ qd.locked = fsa_dev_ptr->locked[qd.cnum];
+ qd.deleted = fsa_dev_ptr->deleted[qd.cnum];
+
+ if (fsa_dev_ptr->devno[qd.cnum] == -1)
+ qd.unmapped = 1;
+ else
+ qd.unmapped = 0;
+
+ get_sd_devname(fsa_dev_ptr->devno[qd.cnum], qd.name);
+
+ if (copy_to_user(arg, &qd, sizeof (struct aac_query_disk)))
+ return -EFAULT;
+ return 0;
+}
+
+static void get_sd_devname(int disknum, char *buffer)
+{
+ if (disknum < 0) {
+ sprintf(buffer, "%s", "");
+ return;
+ }
+
+ if (disknum < 26)
+ sprintf(buffer, "sd%c", 'a' + disknum);
+ else {
+ unsigned int min1;
+ unsigned int min2;
+ /*
+ * For larger numbers of disks, we need to go to a new
+ * naming scheme.
+ */
+ min1 = disknum / 26;
+ min2 = disknum % 26;
+ sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
+ }
+}
+
+static int force_delete_disk(struct aac_dev *dev, void *arg)
+{
+ struct aac_delete_disk dd;
+ struct fsa_scsi_hba *fsa_dev_ptr;
+
+ fsa_dev_ptr = &(dev->fsa_dev);
+
+ if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
+ return -EFAULT;
+
+ if (dd.cnum > MAXIMUM_NUM_CONTAINERS)
+ return -EINVAL;
+ /*
+ * Mark this container as being deleted.
+ */
+ fsa_dev_ptr->deleted[dd.cnum] = 1;
+ /*
+ * Mark the container as no longer valid
+ */
+ fsa_dev_ptr->valid[dd.cnum] = 0;
+ return 0;
+}
+
+static int delete_disk(struct aac_dev *dev, void *arg)
+{
+ struct aac_delete_disk dd;
+ struct fsa_scsi_hba *fsa_dev_ptr;
+
+ fsa_dev_ptr = &(dev->fsa_dev);
+
+ if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
+ return -EFAULT;
+
+ if (dd.cnum > MAXIMUM_NUM_CONTAINERS)
+ return -EINVAL;
+ /*
+ * If the container is locked, it can not be deleted by the API.
+ */
+ if (fsa_dev_ptr->locked[dd.cnum])
+ return -EBUSY;
+ else {
+ /*
+ * Mark the container as no longer being valid.
+ */
+ fsa_dev_ptr->valid[dd.cnum] = 0;
+ fsa_dev_ptr->devno[dd.cnum] = -1;
+ return 0;
+ }
+}
+
+int aac_dev_ioctl(struct aac_dev *dev, int cmd, void *arg)
+{
+ switch (cmd) {
+ case FSACTL_QUERY_DISK:
+ return query_disk(dev, arg);
+ case FSACTL_DELETE_DISK:
+ return delete_disk(dev, arg);
+ case FSACTL_FORCE_DELETE_DISK:
+ return force_delete_disk(dev, arg);
+ case 2131:
+ return aac_get_containers(dev);
+ default:
+ return -ENOTTY;
+ }
+}
+
+/**
+ *
+ * aac_srb_callback
+ * @context: the context set in the fib - here it is scsi cmd
+ * @fibptr: pointer to the fib
+ *
+ * Handles the completion of a scsi command to a non dasd device
+ *
+ */
+
+static void aac_srb_callback(void *context, struct fib * fibptr)
+{
+ struct aac_dev *dev;
+ struct aac_srb_reply *srbreply;
+ Scsi_Cmnd *scsicmd;
+
+ scsicmd = (Scsi_Cmnd *) context;
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+
+ if (fibptr == NULL)
+ BUG();
+
+ srbreply = (struct aac_srb_reply *) fib_data(fibptr);
+
+ scsicmd->sense_buffer[0] = '\0'; // initialize sense valid flag to false
+ // calculate resid for sg
+ scsicmd->resid = scsicmd->request_bufflen - srbreply->data_xfer_length;
+
+ if(scsicmd->use_sg)
+ pci_unmap_sg(dev->pdev,
+ (struct scatterlist *)scsicmd->buffer,
+ scsicmd->use_sg,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ else if(scsicmd->request_bufflen)
+ pci_unmap_single(dev->pdev, (ulong)scsicmd->SCp.ptr,
+ scsicmd->request_bufflen,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+
+ /*
+ * First check the fib status
+ */
+
+ if (le32_to_cpu(srbreply->status) != ST_OK){
+ int len;
+ printk(KERN_WARNING "aac_srb_callback: srb failed, status = %d\n",
+ le32_to_cpu(srbreply->status));
+ len = (srbreply->sense_data_size > sizeof(scsicmd->sense_buffer))?
+ sizeof(scsicmd->sense_buffer):srbreply->sense_data_size;
+ scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8 |
+ CHECK_CONDITION;
+ memcpy(scsicmd->sense_buffer, srbreply->sense_data, len);
+ }
+
+ /*
+ * Next check the srb status
+ */
+ switch(le32_to_cpu(srbreply->srb_status)){
+ case SRB_STATUS_ERROR_RECOVERY:
+ case SRB_STATUS_PENDING:
+ case SRB_STATUS_SUCCESS:
+ if(scsicmd->cmnd[0] == INQUIRY ){
+ u8 b;
+ /* We can't expose disk devices because we can't tell whether they
+ * are the raw container drives or stand alone drives
+ */
+ b = *(u8*)scsicmd->buffer;
+ if( (b & 0x0f) == TYPE_DISK ){
+ scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+ }
+ } else {
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+ }
+ break;
+ case SRB_STATUS_DATA_OVERRUN:
+ switch(scsicmd->cmnd[0]){
+ case READ_6:
+ case WRITE_6:
+ case READ_10:
+ case WRITE_10:
+ case READ_12:
+ case WRITE_12:
+ if(le32_to_cpu(srbreply->data_xfer_length) < scsicmd->underflow ) {
+ printk(KERN_WARNING"aacraid: SCSI CMD underflow\n");
+ } else {
+ printk(KERN_WARNING"aacraid: SCSI CMD Data Overrun\n");
+ }
+ scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+ break;
+ default:
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+ break;
+ }
+ break;
+ case SRB_STATUS_ABORTED:
+ scsicmd->result = DID_ABORT << 16 | ABORT << 8;
+ break;
+ case SRB_STATUS_ABORT_FAILED:
+ // Not sure about this one - but assuming the hba was trying
+ // to abort for some reason
+ scsicmd->result = DID_ERROR << 16 | ABORT << 8;
+ break;
+ case SRB_STATUS_PARITY_ERROR:
+ scsicmd->result = DID_PARITY << 16 | MSG_PARITY_ERROR << 8;
+ break;
+ case SRB_STATUS_NO_DEVICE:
+ case SRB_STATUS_INVALID_PATH_ID:
+ case SRB_STATUS_INVALID_TARGET_ID:
+ case SRB_STATUS_INVALID_LUN:
+ case SRB_STATUS_SELECTION_TIMEOUT:
+ scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+ break;
+
+ case SRB_STATUS_COMMAND_TIMEOUT:
+ case SRB_STATUS_TIMEOUT:
+ scsicmd->result = DID_TIME_OUT << 16 | COMMAND_COMPLETE << 8;
+ break;
+
+ case SRB_STATUS_BUSY:
+ scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+ break;
+
+ case SRB_STATUS_BUS_RESET:
+ scsicmd->result = DID_RESET << 16 | COMMAND_COMPLETE << 8;
+ break;
+
+ case SRB_STATUS_MESSAGE_REJECTED:
+ scsicmd->result = DID_ERROR << 16 | MESSAGE_REJECT << 8;
+ break;
+ case SRB_STATUS_REQUEST_FLUSHED:
+ case SRB_STATUS_ERROR:
+ case SRB_STATUS_INVALID_REQUEST:
+ case SRB_STATUS_REQUEST_SENSE_FAILED:
+ case SRB_STATUS_NO_HBA:
+ case SRB_STATUS_UNEXPECTED_BUS_FREE:
+ case SRB_STATUS_PHASE_SEQUENCE_FAILURE:
+ case SRB_STATUS_BAD_SRB_BLOCK_LENGTH:
+ case SRB_STATUS_DELAYED_RETRY:
+ case SRB_STATUS_BAD_FUNCTION:
+ case SRB_STATUS_NOT_STARTED:
+ case SRB_STATUS_NOT_IN_USE:
+ case SRB_STATUS_FORCE_ABORT:
+ case SRB_STATUS_DOMAIN_VALIDATION_FAIL:
+ default:
+#ifdef AAC_DETAILED_STATUS_INFO
+ printk("aacraid: SRB ERROR (%s)\n",
+ aac_get_status_string(le32_to_cpu(srbreply->srb_status)));
+#endif
+ scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+ break;
+ }
+ if (le32_to_cpu(srbreply->scsi_status) == 0x02 ){ // Check Condition
+ int len;
+ len = (srbreply->sense_data_size > sizeof(scsicmd->sense_buffer))?
+ sizeof(scsicmd->sense_buffer):srbreply->sense_data_size;
+ printk(KERN_WARNING "aac_srb_callback: check condition, "
+ "status = %d len=%d\n", le32_to_cpu(srbreply->status), len);
+ memcpy(scsicmd->sense_buffer, srbreply->sense_data, len);
+ }
+ /*
+ * OR in the scsi status (already shifted up a bit)
+ */
+ scsicmd->result |= le32_to_cpu(srbreply->scsi_status);
+
+ fib_complete(fibptr);
+ fib_free(fibptr);
+ aac_io_done(scsicmd);
+}
+
+/**
+ *
+ * aac_send_scb_fib
+ * @scsicmd: the scsi command block
+ *
+ * This routine will form a FIB and fill in the aac_srb from the
+ * scsicmd passed in.
+ */
+
+static int aac_send_srb_fib(Scsi_Cmnd* scsicmd)
+{
+ struct fib* cmd_fibcontext;
+ struct aac_dev* dev;
+ int status;
+ struct aac_srb *srbcmd;
+ u16 fibsize;
+ u32 flag;
+
+ if( scsicmd->target > 15 || scsicmd->lun > 7) {
+ scsicmd->result = DID_NO_CONNECT << 16;
+ __aac_io_done(scsicmd);
+ return 0;
+ }
+
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+ switch(scsicmd->sc_data_direction){
+ case SCSI_DATA_WRITE:
+ flag = SRB_DataOut;
+ break;
+ case SCSI_DATA_UNKNOWN:
+ flag = SRB_DataIn | SRB_DataOut;
+ break;
+ case SCSI_DATA_READ:
+ flag = SRB_DataIn;
+ break;
+ case SCSI_DATA_NONE:
+ default:
+ flag = SRB_NoDataXfer;
+ break;
+ }
+
+
+ /*
+ * Allocate and initialize a Fib then setup a BlockWrite command
+ */
+ if (!(cmd_fibcontext = fib_alloc(dev))) {
+ scsicmd->result = DID_ERROR << 16;
+ __aac_io_done(scsicmd);
+ return -1;
+ }
+ fib_init(cmd_fibcontext);
+
+ srbcmd = (struct aac_srb*) fib_data(cmd_fibcontext);
+ srbcmd->function = cpu_to_le32(SRBF_ExecuteScsi);
+ srbcmd->channel = cpu_to_le32(aac_logical_to_phys(scsicmd->channel));
+ srbcmd->target = cpu_to_le32(scsicmd->target);
+ srbcmd->lun = cpu_to_le32(scsicmd->lun);
+ srbcmd->flags = cpu_to_le32(flag);
+ srbcmd->timeout = cpu_to_le32(0); // timeout not used
+ srbcmd->retry_limit =cpu_to_le32(0); // Obsolete parameter
+ srbcmd->cdb_size = cpu_to_le32(scsicmd->cmd_len);
+
+ if( dev->pae_support ==1 ) {
+ aac_build_sg64(scsicmd, (struct sgmap64*) &srbcmd->sg);
+ srbcmd->count = cpu_to_le32(scsicmd->request_bufflen);
+
+ memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+ memcpy(srbcmd->cdb, scsicmd->cmnd, scsicmd->cmd_len);
+ /*
+ * Build Scatter/Gather list
+ */
+ fibsize = sizeof (struct aac_srb) + (((srbcmd->sg.count & 0xff) - 1)
+ * sizeof (struct sgentry64));
+
+ /*
+ * Now send the Fib to the adapter
+ */
+ status = fib_send(ScsiPortCommand64, cmd_fibcontext, fibsize,
+ FsaNormal, 0, 1, (fib_callback) aac_srb_callback,
+ (void *) scsicmd);
+ } else {
+ aac_build_sg(scsicmd, (struct sgmap*)&srbcmd->sg);
+ srbcmd->count = cpu_to_le32(scsicmd->request_bufflen);
+
+ memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+ memcpy(srbcmd->cdb, scsicmd->cmnd, scsicmd->cmd_len);
+ /*
+ * Build Scatter/Gather list
+ */
+ fibsize = sizeof (struct aac_srb) + (((srbcmd->sg.count & 0xff) - 1)
+ * sizeof (struct sgentry));
+
+ /*
+ * Now send the Fib to the adapter
+ */
+ status = fib_send(ScsiPortCommand, cmd_fibcontext, fibsize,
+ FsaNormal, 0, 1, (fib_callback) aac_srb_callback,
+ (void *) scsicmd);
+ }
+ /*
+ * Check that the command queued to the controller
+ */
+ if (status == -EINPROGRESS){
+ return 0;
+ }
+
+ printk(KERN_WARNING "aac_srb: fib_send failed with status: %d\n", status);
+ /*
+ * For some reason, the Fib didn't queue, return QUEUE_FULL
+ */
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL;
+ __aac_io_done(scsicmd);
+
+ fib_complete(cmd_fibcontext);
+ fib_free(cmd_fibcontext);
+
+ return -1;
+}
+
+static unsigned long aac_build_sg(Scsi_Cmnd* scsicmd, struct sgmap* psg)
+{
+ struct aac_dev *dev;
+ unsigned long byte_count = 0;
+
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+ // Get rid of old data
+ psg->count = cpu_to_le32(0);
+ psg->sg[0].addr = cpu_to_le32(NULL);
+ psg->sg[0].count = cpu_to_le32(0);
+ if (scsicmd->use_sg) {
+ struct scatterlist *sg;
+ int i;
+ int sg_count;
+ sg = (struct scatterlist *) scsicmd->request_buffer;
+
+ sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ psg->count = cpu_to_le32(sg_count);
+
+ byte_count = 0;
+
+ for (i = 0; i < sg_count; i++) {
+ psg->sg[i].addr = cpu_to_le32(sg_dma_address(sg));
+ psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+ byte_count += sg_dma_len(sg);
+ sg++;
+ }
+ /* hba wants the size to be exact */
+ if(byte_count > scsicmd->request_bufflen){
+ psg->sg[i-1].count -= (byte_count - scsicmd->request_bufflen);
+ byte_count = scsicmd->request_bufflen;
+ }
+ /* Check for command underflow */
+ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+ printk(KERN_WARNING"aacraid: cmd len %08lX cmd underflow %08X\n",
+ byte_count, scsicmd->underflow);
+ }
+ }
+ else if(scsicmd->request_bufflen) {
+ dma_addr_t addr;
+ addr = pci_map_single(dev->pdev,
+ scsicmd->request_buffer,
+ scsicmd->request_bufflen,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ psg->count = cpu_to_le32(1);
+ psg->sg[0].addr = cpu_to_le32(addr);
+ psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);
+ scsicmd->SCp.ptr = (void *)addr;
+ byte_count = scsicmd->request_bufflen;
+ }
+ return byte_count;
+}
+
+
+static unsigned long aac_build_sg64(Scsi_Cmnd* scsicmd, struct sgmap64* psg)
+{
+ struct aac_dev *dev;
+ unsigned long byte_count = 0;
+ u64 le_addr;
+
+ dev = (struct aac_dev *)scsicmd->host->hostdata;
+ // Get rid of old data
+ psg->count = cpu_to_le32(0);
+ psg->sg[0].addr[0] = cpu_to_le32(NULL);
+ psg->sg[0].addr[1] = cpu_to_le32(NULL);
+ psg->sg[0].count = cpu_to_le32(0);
+ if (scsicmd->use_sg) {
+ struct scatterlist *sg;
+ int i;
+ int sg_count;
+ sg = (struct scatterlist *) scsicmd->request_buffer;
+
+ sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ psg->count = cpu_to_le32(sg_count);
+
+ byte_count = 0;
+
+ for (i = 0; i < sg_count; i++) {
+ le_addr = cpu_to_le64(sg_dma_address(sg));
+ psg->sg[i].addr[1] = (u32)(le_addr>>32);
+ psg->sg[i].addr[0] = (u32)(le_addr & 0xffffffff);
+ psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+ byte_count += sg_dma_len(sg);
+ sg++;
+ }
+ /* hba wants the size to be exact */
+ if(byte_count > scsicmd->request_bufflen){
+ psg->sg[i-1].count -= (byte_count - scsicmd->request_bufflen);
+ byte_count = scsicmd->request_bufflen;
+ }
+ /* Check for command underflow */
+ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+ printk(KERN_WARNING"aacraid: cmd len %08lX cmd underflow %08X\n",
+ byte_count, scsicmd->underflow);
+ }
+ }
+ else if(scsicmd->request_bufflen) {
+ dma_addr_t addr;
+ addr = pci_map_single(dev->pdev,
+ scsicmd->request_buffer,
+ scsicmd->request_bufflen,
+ scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+ psg->count = cpu_to_le32(1);
+ le_addr = cpu_to_le64(addr);
+ psg->sg[0].addr[1] = (u32)(le_addr>>32);
+ psg->sg[0].addr[0] = (u32)(le_addr & 0xffffffff);
+ psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);
+ scsicmd->SCp.ptr = (void *)addr;
+ byte_count = scsicmd->request_bufflen;
+ }
+ return byte_count;
+}
+
+#ifdef AAC_DETAILED_STATUS_INFO
+
+struct aac_srb_status_info {
+ u32 status;
+ char *str;
+};
+
+
+static struct aac_srb_status_info srb_status_info[] = {
+ { SRB_STATUS_PENDING, "Pending Status"},
+ { SRB_STATUS_SUCCESS, "Success"},
+ { SRB_STATUS_ABORTED, "Aborted Command"},
+ { SRB_STATUS_ABORT_FAILED, "Abort Failed"},
+ { SRB_STATUS_ERROR, "Error Event"},
+ { SRB_STATUS_BUSY, "Device Busy"},
+ { SRB_STATUS_INVALID_REQUEST, "Invalid Request"},
+ { SRB_STATUS_INVALID_PATH_ID, "Invalid Path ID"},
+ { SRB_STATUS_NO_DEVICE, "No Device"},
+ { SRB_STATUS_TIMEOUT, "Timeout"},
+ { SRB_STATUS_SELECTION_TIMEOUT, "Selection Timeout"},
+ { SRB_STATUS_COMMAND_TIMEOUT, "Command Timeout"},
+ { SRB_STATUS_MESSAGE_REJECTED, "Message Rejected"},
+ { SRB_STATUS_BUS_RESET, "Bus Reset"},
+ { SRB_STATUS_PARITY_ERROR, "Parity Error"},
+ { SRB_STATUS_REQUEST_SENSE_FAILED,"Request Sense Failed"},
+ { SRB_STATUS_NO_HBA, "No HBA"},
+ { SRB_STATUS_DATA_OVERRUN, "Data Overrun/Data Underrun"},
+ { SRB_STATUS_UNEXPECTED_BUS_FREE,"Unexpected Bus Free"},
+ { SRB_STATUS_PHASE_SEQUENCE_FAILURE,"Phase Error"},
+ { SRB_STATUS_BAD_SRB_BLOCK_LENGTH,"Bad Srb Block Length"},
+ { SRB_STATUS_REQUEST_FLUSHED, "Request Flushed"},
+ { SRB_STATUS_DELAYED_RETRY, "Delayed Retry"},
+ { SRB_STATUS_INVALID_LUN, "Invalid LUN"},
+ { SRB_STATUS_INVALID_TARGET_ID, "Invalid TARGET ID"},
+ { SRB_STATUS_BAD_FUNCTION, "Bad Function"},
+ { SRB_STATUS_ERROR_RECOVERY, "Error Recovery"},
+ { SRB_STATUS_NOT_STARTED, "Not Started"},
+ { SRB_STATUS_NOT_IN_USE, "Not In Use"},
+ { SRB_STATUS_FORCE_ABORT, "Force Abort"},
+ { SRB_STATUS_DOMAIN_VALIDATION_FAIL,"Domain Validation Failure"},
+ { 0xff, "Unknown Error"}
+};
+
+char *aac_get_status_string(u32 status)
+{
+ int i;
+
+ for(i=0; i < (sizeof(srb_status_info)/sizeof(struct aac_srb_status_info)); i++ ){
+ if(srb_status_info[i].status == status){
+ return srb_status_info[i].str;
+ }
+ }
+
+ return "Bad Status Code";
+}
+
+#endif
diff --git a/xen/drivers/scsi/aacraid/aacraid.h b/xen/drivers/scsi/aacraid/aacraid.h
new file mode 100644
index 0000000000..1f9838436d
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/aacraid.h
@@ -0,0 +1,1420 @@
+
+/* #define dprintk(x) */
+// #define dprintk(x) printk x
+#define dprintk(x)
+
+
+#include <asm/byteorder.h>
+
+#define TRY_TASKLET
+#ifdef TRY_TASKLET
+/* XXX SMH: trying to use softirqs to trigger stuff done prev by threads */
+#include <xeno/interrupt.h> /* for tasklet/softirq stuff */
+#endif
+
+/*------------------------------------------------------------------------------
+ * D E F I N E S
+ *----------------------------------------------------------------------------*/
+
+#define MAXIMUM_NUM_CONTAINERS 31
+#define MAXIMUM_NUM_ADAPTERS 8
+
+#define AAC_NUM_FIB 578
+#define AAC_NUM_IO_FIB 512
+
+#define AAC_MAX_TARGET (MAXIMUM_NUM_CONTAINERS+1)
+//#define AAC_MAX_TARGET (16)
+#define AAC_MAX_LUN (8)
+
+/*
+ * These macros convert from physical channels to virtual channels
+ */
+#define CONTAINER_CHANNEL (0)
+#define aac_phys_to_logical(x) (x+1)
+#define aac_logical_to_phys(x) (x?x-1:0)
+
+#define AAC_DETAILED_STATUS_INFO
+
+struct diskparm
+{
+ int heads;
+ int sectors;
+ int cylinders;
+};
+
+
+/*
+ * DON'T CHANGE THE ORDER, this is set by the firmware
+ */
+
+#define CT_NONE 0
+#define CT_VOLUME 1
+#define CT_MIRROR 2
+#define CT_STRIPE 3
+#define CT_RAID5 4
+#define CT_SSRW 5
+#define CT_SSRO 6
+#define CT_MORPH 7
+#define CT_PASSTHRU 8
+#define CT_RAID4 9
+#define CT_RAID10 10 /* stripe of mirror */
+#define CT_RAID00 11 /* stripe of stripe */
+#define CT_VOLUME_OF_MIRRORS 12 /* volume of mirror */
+#define CT_PSEUDO_RAID 13 /* really raid4 */
+#define CT_LAST_VOLUME_TYPE 14
+
+/*
+ * Types of objects addressable in some fashion by the client.
+ * This is a superset of those objects handled just by the filesystem
+ * and includes "raw" objects that an administrator would use to
+ * configure containers and filesystems.
+ */
+
+#define FT_REG 1 /* regular file */
+#define FT_DIR 2 /* directory */
+#define FT_BLK 3 /* "block" device - reserved */
+#define FT_CHR 4 /* "character special" device - reserved */
+#define FT_LNK 5 /* symbolic link */
+#define FT_SOCK 6 /* socket */
+#define FT_FIFO 7 /* fifo */
+#define FT_FILESYS 8 /* ADAPTEC's "FSA"(tm) filesystem */
+#define FT_DRIVE 9 /* physical disk - addressable in scsi by bus/target/lun */
+#define FT_SLICE 10 /* virtual disk - raw volume - slice */
+#define FT_PARTITION 11 /* FSA partition - carved out of a slice - building block for containers */
+#define FT_VOLUME 12 /* Container - Volume Set */
+#define FT_STRIPE 13 /* Container - Stripe Set */
+#define FT_MIRROR 14 /* Container - Mirror Set */
+#define FT_RAID5 15 /* Container - Raid 5 Set */
+#define FT_DATABASE 16 /* Storage object with "foreign" content manager */
+
+/*
+ * Host side memory scatter gather list
+ * Used by the adapter for read, write, and readdirplus operations
+ * We have seperate 32 and 64 bit version because even
+ * on 64 bit systems not all cards support the 64 bit version
+ */
+struct sgentry {
+ u32 addr; /* 32-bit address. */
+ u32 count; /* Length. */
+};
+
+struct sgentry64 {
+ u32 addr[2]; /* 64-bit addr. 2 pieces for data alignment */
+ u32 count; /* Length. */
+};
+
+/*
+ * SGMAP
+ *
+ * This is the SGMAP structure for all commands that use
+ * 32-bit addressing.
+ */
+
+struct sgmap {
+ u32 count;
+ struct sgentry sg[1];
+};
+
+struct sgmap64 {
+ u32 count;
+ struct sgentry64 sg[1];
+};
+
+struct creation_info
+{
+ u8 buildnum; /* e.g., 588 */
+ u8 usec; /* e.g., 588 */
+ u8 via; /* e.g., 1 = FSU,
+ * 2 = API
+ */
+ u8 year; /* e.g., 1997 = 97 */
+ u32 date; /*
+ * unsigned Month :4; // 1 - 12
+ * unsigned Day :6; // 1 - 32
+ * unsigned Hour :6; // 0 - 23
+ * unsigned Minute :6; // 0 - 60
+ * unsigned Second :6; // 0 - 60
+ */
+ u32 serial[2]; /* e.g., 0x1DEADB0BFAFAF001 */
+};
+
+
+/*
+ * Define all the constants needed for the communication interface
+ */
+
+/*
+ * Define how many queue entries each queue will have and the total
+ * number of entries for the entire communication interface. Also define
+ * how many queues we support.
+ *
+ * This has to match the controller
+ */
+
+#define NUMBER_OF_COMM_QUEUES 8 // 4 command; 4 response
+#define HOST_HIGH_CMD_ENTRIES 4
+#define HOST_NORM_CMD_ENTRIES 8
+#define ADAP_HIGH_CMD_ENTRIES 4
+#define ADAP_NORM_CMD_ENTRIES 512
+#define HOST_HIGH_RESP_ENTRIES 4
+#define HOST_NORM_RESP_ENTRIES 512
+#define ADAP_HIGH_RESP_ENTRIES 4
+#define ADAP_NORM_RESP_ENTRIES 8
+
+#define TOTAL_QUEUE_ENTRIES \
+ (HOST_NORM_CMD_ENTRIES + HOST_HIGH_CMD_ENTRIES + ADAP_NORM_CMD_ENTRIES + ADAP_HIGH_CMD_ENTRIES + \
+ HOST_NORM_RESP_ENTRIES + HOST_HIGH_RESP_ENTRIES + ADAP_NORM_RESP_ENTRIES + ADAP_HIGH_RESP_ENTRIES)
+
+
+/*
+ * Set the queues on a 16 byte alignment
+ */
+
+#define QUEUE_ALIGNMENT 16
+
+/*
+ * The queue headers define the Communication Region queues. These
+ * are physically contiguous and accessible by both the adapter and the
+ * host. Even though all queue headers are in the same contiguous block
+ * they will be represented as individual units in the data structures.
+ */
+
+struct aac_entry {
+ u32 size; /* Size in bytes of Fib which this QE points to */
+ u32 addr; /* Receiver address of the FIB */
+};
+
+/*
+ * The adapter assumes the ProducerIndex and ConsumerIndex are grouped
+ * adjacently and in that order.
+ */
+
+struct aac_qhdr {
+ u64 header_addr; /* Address to hand the adapter to access to this queue head */
+ u32 *producer; /* The producer index for this queue (host address) */
+ u32 *consumer; /* The consumer index for this queue (host address) */
+};
+
+/*
+ * Define all the events which the adapter would like to notify
+ * the host of.
+ */
+
+#define HostNormCmdQue 1 /* Change in host normal priority command queue */
+#define HostHighCmdQue 2 /* Change in host high priority command queue */
+#define HostNormRespQue 3 /* Change in host normal priority response queue */
+#define HostHighRespQue 4 /* Change in host high priority response queue */
+#define AdapNormRespNotFull 5
+#define AdapHighRespNotFull 6
+#define AdapNormCmdNotFull 7
+#define AdapHighCmdNotFull 8
+#define SynchCommandComplete 9
+#define AdapInternalError 0xfe /* The adapter detected an internal error shutting down */
+
+/*
+ * Define all the events the host wishes to notify the
+ * adapter of. The first four values much match the Qid the
+ * corresponding queue.
+ */
+
+#define AdapNormCmdQue 2
+#define AdapHighCmdQue 3
+#define AdapNormRespQue 6
+#define AdapHighRespQue 7
+#define HostShutdown 8
+#define HostPowerFail 9
+#define FatalCommError 10
+#define HostNormRespNotFull 11
+#define HostHighRespNotFull 12
+#define HostNormCmdNotFull 13
+#define HostHighCmdNotFull 14
+#define FastIo 15
+#define AdapPrintfDone 16
+
+/*
+ * Define all the queues that the adapter and host use to communicate
+ * Number them to match the physical queue layout.
+ */
+
+enum aac_queue_types {
+ HostNormCmdQueue = 0, /* Adapter to host normal priority command traffic */
+ HostHighCmdQueue, /* Adapter to host high priority command traffic */
+ AdapNormCmdQueue, /* Host to adapter normal priority command traffic */
+ AdapHighCmdQueue, /* Host to adapter high priority command traffic */
+ HostNormRespQueue, /* Adapter to host normal priority response traffic */
+ HostHighRespQueue, /* Adapter to host high priority response traffic */
+ AdapNormRespQueue, /* Host to adapter normal priority response traffic */
+ AdapHighRespQueue /* Host to adapter high priority response traffic */
+};
+
+/*
+ * Assign type values to the FSA communication data structures
+ */
+
+#define FIB_MAGIC 0x0001
+
+/*
+ * Define the priority levels the FSA communication routines support.
+ */
+
+#define FsaNormal 1
+#define FsaHigh 2
+
+/*
+ * Define the FIB. The FIB is the where all the requested data and
+ * command information are put to the application on the FSA adapter.
+ */
+
+struct aac_fibhdr {
+ u32 XferState; // Current transfer state for this CCB
+ u16 Command; // Routing information for the destination
+ u8 StructType; // Type FIB
+ u8 Flags; // Flags for FIB
+ u16 Size; // Size of this FIB in bytes
+ u16 SenderSize; // Size of the FIB in the sender (for
+ // response sizing)
+ u32 SenderFibAddress; // Host defined data in the FIB
+ u32 ReceiverFibAddress; // Logical address of this FIB for the adapter
+ u32 SenderData; // Place holder for the sender to store data
+ union {
+ struct {
+ u32 _ReceiverTimeStart; // Timestamp for receipt of fib
+ u32 _ReceiverTimeDone; // Timestamp for completion of fib
+ } _s;
+ struct list_head _FibLinks; // Used to link Adapter Initiated
+ // Fibs on the host
+ } _u;
+};
+
+#define FibLinks _u._FibLinks
+
+#define FIB_DATA_SIZE_IN_BYTES (512 - sizeof(struct aac_fibhdr))
+
+
+struct hw_fib {
+ struct aac_fibhdr header;
+ u8 data[FIB_DATA_SIZE_IN_BYTES]; // Command specific data
+};
+
+/*
+ * FIB commands
+ */
+
+#define TestCommandResponse 1
+#define TestAdapterCommand 2
+/*
+ * Lowlevel and comm commands
+ */
+#define LastTestCommand 100
+#define ReinitHostNormCommandQueue 101
+#define ReinitHostHighCommandQueue 102
+#define ReinitHostHighRespQueue 103
+#define ReinitHostNormRespQueue 104
+#define ReinitAdapNormCommandQueue 105
+#define ReinitAdapHighCommandQueue 107
+#define ReinitAdapHighRespQueue 108
+#define ReinitAdapNormRespQueue 109
+#define InterfaceShutdown 110
+#define DmaCommandFib 120
+#define StartProfile 121
+#define TermProfile 122
+#define SpeedTest 123
+#define TakeABreakPt 124
+#define RequestPerfData 125
+#define SetInterruptDefTimer 126
+#define SetInterruptDefCount 127
+#define GetInterruptDefStatus 128
+#define LastCommCommand 129
+/*
+ * Filesystem commands
+ */
+#define NuFileSystem 300
+#define UFS 301
+#define HostFileSystem 302
+#define LastFileSystemCommand 303
+/*
+ * Container Commands
+ */
+#define ContainerCommand 500
+#define ContainerCommand64 501
+/*
+ * Cluster Commands
+ */
+#define ClusterCommand 550
+/*
+ * Scsi Port commands (scsi passthrough)
+ */
+#define ScsiPortCommand 600
+#define ScsiPortCommand64 601
+/*
+ * Misc house keeping and generic adapter initiated commands
+ */
+#define AifRequest 700
+#define CheckRevision 701
+#define FsaHostShutdown 702
+#define RequestAdapterInfo 703
+#define IsAdapterPaused 704
+#define SendHostTime 705
+#define LastMiscCommand 706
+
+//
+// Commands that will target the failover level on the FSA adapter
+//
+
+enum fib_xfer_state {
+ HostOwned = (1<<0),
+ AdapterOwned = (1<<1),
+ FibInitialized = (1<<2),
+ FibEmpty = (1<<3),
+ AllocatedFromPool = (1<<4),
+ SentFromHost = (1<<5),
+ SentFromAdapter = (1<<6),
+ ResponseExpected = (1<<7),
+ NoResponseExpected = (1<<8),
+ AdapterProcessed = (1<<9),
+ HostProcessed = (1<<10),
+ HighPriority = (1<<11),
+ NormalPriority = (1<<12),
+ Async = (1<<13),
+ AsyncIo = (1<<13), // rpbfix: remove with new regime
+ PageFileIo = (1<<14), // rpbfix: remove with new regime
+ ShutdownRequest = (1<<15),
+ LazyWrite = (1<<16), // rpbfix: remove with new regime
+ AdapterMicroFib = (1<<17),
+ BIOSFibPath = (1<<18),
+ FastResponseCapable = (1<<19),
+ ApiFib = (1<<20) // Its an API Fib.
+};
+
+/*
+ * The following defines needs to be updated any time there is an
+ * incompatible change made to the aac_init structure.
+ */
+
+#define ADAPTER_INIT_STRUCT_REVISION 3
+
+struct aac_init
+{
+ u32 InitStructRevision;
+ u32 MiniPortRevision;
+ u32 fsrev;
+ u32 CommHeaderAddress;
+ u32 FastIoCommAreaAddress;
+ u32 AdapterFibsPhysicalAddress;
+ u32 AdapterFibsVirtualAddress;
+ u32 AdapterFibsSize;
+ u32 AdapterFibAlign;
+ u32 printfbuf;
+ u32 printfbufsiz;
+ u32 HostPhysMemPages; // number of 4k pages of host physical memory
+ u32 HostElapsedSeconds; // number of seconds since 1970.
+};
+
+enum aac_log_level {
+ LOG_INIT = 10,
+ LOG_INFORMATIONAL = 20,
+ LOG_WARNING = 30,
+ LOG_LOW_ERROR = 40,
+ LOG_MEDIUM_ERROR = 50,
+ LOG_HIGH_ERROR = 60,
+ LOG_PANIC = 70,
+ LOG_DEBUG = 80,
+ LOG_WINDBG_PRINT = 90
+};
+
+#define FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT 0x030b
+#define FSAFS_NTC_FIB_CONTEXT 0x030c
+
+struct aac_dev;
+
+struct adapter_ops
+{
+ void (*adapter_interrupt)(struct aac_dev *dev);
+ void (*adapter_notify)(struct aac_dev *dev, u32 event);
+ void (*adapter_enable_int)(struct aac_dev *dev, u32 event);
+ void (*adapter_disable_int)(struct aac_dev *dev, u32 event);
+ int (*adapter_sync_cmd)(struct aac_dev *dev, u32 command, u32 p1, u32 *status);
+};
+
+/*
+ * Define which interrupt handler needs to be installed
+ */
+
+struct aac_driver_ident
+{
+ u16 vendor;
+ u16 device;
+ u16 subsystem_vendor;
+ u16 subsystem_device;
+ int (*init)(struct aac_dev *dev, unsigned long num);
+ char * name;
+ char * vname;
+ char * model;
+ u16 channels;
+};
+
+/*
+ * The adapter interface specs all queues to be located in the same
+ * physically contigous block. The host structure that defines the
+ * commuication queues will assume they are each a seperate physically
+ * contigous memory region that will support them all being one big
+ * contigous block.
+ * There is a command and response queue for each level and direction of
+ * commuication. These regions are accessed by both the host and adapter.
+ */
+
+struct aac_queue {
+ u64 logical; /* This is the address we give the adapter */
+ struct aac_entry *base; /* This is the system virtual address */
+ struct aac_qhdr headers; /* A pointer to the producer and consumer queue headers for this queue */
+ u32 entries; /* Number of queue entries on this queue */
+#if 0
+ wait_queue_head_t qfull; /* Event to wait on if the queue is full */
+ wait_queue_head_t cmdready; /* Indicates there is a Command ready from the adapter on this queue. */
+#endif
+ /* This is only valid for adapter to host command queues. */
+ spinlock_t *lock; /* Spinlock for this queue must take this lock before accessing the lock */
+ spinlock_t lockdata; /* Actual lock (used only on one side of the lock) */
+ unsigned long SavedIrql; /* Previous IRQL when the spin lock is taken */
+ u32 padding; /* Padding - FIXME - can remove I believe */
+ struct list_head cmdq; /* A queue of FIBs which need to be prcessed by the FS thread. This is */
+ /* only valid for command queues which receive entries from the adapter. */
+ struct list_head pendingq; /* A queue of outstanding fib's to the adapter. */
+ unsigned long numpending; /* Number of entries on outstanding queue. */
+ struct aac_dev * dev; /* Back pointer to adapter structure */
+};
+
+/*
+ * Message queues. The order here is important, see also the
+ * queue type ordering
+ */
+
+struct aac_queue_block
+{
+ struct aac_queue queue[8];
+};
+
+/*
+ * SaP1 Message Unit Registers
+ */
+
+struct sa_drawbridge_CSR {
+ // Offset | Name
+ u32 reserved[10]; // 00h-27h | Reserved
+ u8 LUT_Offset; // 28h | Looup Table Offset
+ u8 reserved1[3]; // 29h-2bh | Reserved
+ u32 LUT_Data; // 2ch | Looup Table Data
+ u32 reserved2[26]; // 30h-97h | Reserved
+ u16 PRICLEARIRQ; // 98h | Primary Clear Irq
+ u16 SECCLEARIRQ; // 9ah | Secondary Clear Irq
+ u16 PRISETIRQ; // 9ch | Primary Set Irq
+ u16 SECSETIRQ; // 9eh | Secondary Set Irq
+ u16 PRICLEARIRQMASK; // a0h | Primary Clear Irq Mask
+ u16 SECCLEARIRQMASK; // a2h | Secondary Clear Irq Mask
+ u16 PRISETIRQMASK; // a4h | Primary Set Irq Mask
+ u16 SECSETIRQMASK; // a6h | Secondary Set Irq Mask
+ u32 MAILBOX0; // a8h | Scratchpad 0
+ u32 MAILBOX1; // ach | Scratchpad 1
+ u32 MAILBOX2; // b0h | Scratchpad 2
+ u32 MAILBOX3; // b4h | Scratchpad 3
+ u32 MAILBOX4; // b8h | Scratchpad 4
+ u32 MAILBOX5; // bch | Scratchpad 5
+ u32 MAILBOX6; // c0h | Scratchpad 6
+ u32 MAILBOX7; // c4h | Scratchpad 7
+
+ u32 ROM_Setup_Data; // c8h | Rom Setup and Data
+ u32 ROM_Control_Addr; // cch | Rom Control and Address
+
+ u32 reserved3[12]; // d0h-ffh | reserved
+ u32 LUT[64]; // 100h-1ffh| Lookup Table Entries
+
+ //
+ // TO DO
+ // need to add DMA, I2O, UART, etc registers form 80h to 364h
+ //
+
+};
+
+#define Mailbox0 SaDbCSR.MAILBOX0
+#define Mailbox1 SaDbCSR.MAILBOX1
+#define Mailbox2 SaDbCSR.MAILBOX2
+#define Mailbox3 SaDbCSR.MAILBOX3
+#define Mailbox4 SaDbCSR.MAILBOX4
+#define Mailbox5 SaDbCSR.MAILBOX5
+#define Mailbox7 SaDbCSR.MAILBOX7
+
+#define DoorbellReg_p SaDbCSR.PRISETIRQ
+#define DoorbellReg_s SaDbCSR.SECSETIRQ
+#define DoorbellClrReg_p SaDbCSR.PRICLEARIRQ
+
+
+#define DOORBELL_0 cpu_to_le16(0x0001)
+#define DOORBELL_1 cpu_to_le16(0x0002)
+#define DOORBELL_2 cpu_to_le16(0x0004)
+#define DOORBELL_3 cpu_to_le16(0x0008)
+#define DOORBELL_4 cpu_to_le16(0x0010)
+#define DOORBELL_5 cpu_to_le16(0x0020)
+#define DOORBELL_6 cpu_to_le16(0x0040)
+
+
+#define PrintfReady DOORBELL_5
+#define PrintfDone DOORBELL_5
+
+struct sa_registers {
+ struct sa_drawbridge_CSR SaDbCSR; /* 98h - c4h */
+};
+
+
+#define Sa_MINIPORT_REVISION 1
+
+#define sa_readw(AEP, CSR) readl(&((AEP)->regs.sa->CSR))
+#define sa_readl(AEP, CSR) readl(&((AEP)->regs.sa->CSR))
+#define sa_writew(AEP, CSR, value) writew(value, &((AEP)->regs.sa->CSR))
+#define sa_writel(AEP, CSR, value) writel(value, &((AEP)->regs.sa->CSR))
+
+/*
+ * Rx Message Unit Registers
+ */
+
+struct rx_mu_registers {
+ // Local | PCI* | Name
+ // | |
+ u32 ARSR; // 1300h | 00h | APIC Register Select Register
+ u32 reserved0; // 1304h | 04h | Reserved
+ u32 AWR; // 1308h | 08h | APIC Window Register
+ u32 reserved1; // 130Ch | 0Ch | Reserved
+ u32 IMRx[2]; // 1310h | 10h | Inbound Message Registers
+ u32 OMRx[2]; // 1318h | 18h | Outbound Message Registers
+ u32 IDR; // 1320h | 20h | Inbound Doorbell Register
+ u32 IISR; // 1324h | 24h | Inbound Interrupt Status Register
+ u32 IIMR; // 1328h | 28h | Inbound Interrupt Mask Register
+ u32 ODR; // 132Ch | 2Ch | Outbound Doorbell Register
+ u32 OISR; // 1330h | 30h | Outbound Interrupt Status Register
+ u32 OIMR; // 1334h | 34h | Outbound Interrupt Mask Register
+ // * Must access through ATU Inbound Translation Window
+};
+
+struct rx_inbound {
+ u32 Mailbox[8];
+};
+
+#define InboundMailbox0 IndexRegs.Mailbox[0]
+#define InboundMailbox1 IndexRegs.Mailbox[1]
+#define InboundMailbox2 IndexRegs.Mailbox[2]
+#define InboundMailbox3 IndexRegs.Mailbox[3]
+#define InboundMailbox4 IndexRegs.Mailbox[4]
+
+#define INBOUNDDOORBELL_0 cpu_to_le32(0x00000001)
+#define INBOUNDDOORBELL_1 cpu_to_le32(0x00000002)
+#define INBOUNDDOORBELL_2 cpu_to_le32(0x00000004)
+#define INBOUNDDOORBELL_3 cpu_to_le32(0x00000008)
+#define INBOUNDDOORBELL_4 cpu_to_le32(0x00000010)
+#define INBOUNDDOORBELL_5 cpu_to_le32(0x00000020)
+#define INBOUNDDOORBELL_6 cpu_to_le32(0x00000040)
+
+#define OUTBOUNDDOORBELL_0 cpu_to_le32(0x00000001)
+#define OUTBOUNDDOORBELL_1 cpu_to_le32(0x00000002)
+#define OUTBOUNDDOORBELL_2 cpu_to_le32(0x00000004)
+#define OUTBOUNDDOORBELL_3 cpu_to_le32(0x00000008)
+#define OUTBOUNDDOORBELL_4 cpu_to_le32(0x00000010)
+
+#define InboundDoorbellReg MUnit.IDR
+#define OutboundDoorbellReg MUnit.ODR
+
+struct rx_registers {
+ struct rx_mu_registers MUnit; // 1300h - 1334h
+ u32 reserved1[6]; // 1338h - 134ch
+ struct rx_inbound IndexRegs;
+};
+
+#define rx_readb(AEP, CSR) readb(&((AEP)->regs.rx->CSR))
+#define rx_readl(AEP, CSR) readl(&((AEP)->regs.rx->CSR))
+#define rx_writeb(AEP, CSR, value) writeb(value, &((AEP)->regs.rx->CSR))
+#define rx_writel(AEP, CSR, value) writel(value, &((AEP)->regs.rx->CSR))
+
+struct fib;
+
+typedef void (*fib_callback)(void *ctxt, struct fib *fibctx);
+
+struct aac_fib_context {
+ s16 type; // used for verification of structure
+ s16 size;
+ ulong jiffies; // used for cleanup - dmb changed to ulong
+ struct list_head next; // used to link context's into a linked list
+#if 0
+ struct semaphore wait_sem; // this is used to wait for the next fib to arrive.
+#endif
+ int wait; // Set to true when thread is in WaitForSingleObject
+ unsigned long count; // total number of FIBs on FibList
+ struct list_head fibs;
+};
+
+struct fsa_scsi_hba {
+ u32 size[MAXIMUM_NUM_CONTAINERS];
+ u32 type[MAXIMUM_NUM_CONTAINERS];
+ u8 valid[MAXIMUM_NUM_CONTAINERS];
+ u8 ro[MAXIMUM_NUM_CONTAINERS];
+ u8 locked[MAXIMUM_NUM_CONTAINERS];
+ u8 deleted[MAXIMUM_NUM_CONTAINERS];
+ u32 devno[MAXIMUM_NUM_CONTAINERS];
+};
+
+struct fib {
+ void *next; /* this is used by the allocator */
+ s16 type;
+ s16 size;
+ /*
+ * The Adapter that this I/O is destined for.
+ */
+ struct aac_dev *dev;
+ u64 logicaladdr; /* 64 bit */
+#if 0
+ /*
+ * This is the event the sendfib routine will wait on if the
+ * caller did not pass one and this is synch io.
+ */
+ struct semaphore event_wait;
+#endif
+ spinlock_t event_lock;
+
+ u32 done; /* gets set to 1 when fib is complete */
+ fib_callback callback;
+ void *callback_data;
+ u32 flags; // u32 dmb was ulong
+ /*
+ * The following is used to put this fib context onto the
+ * Outstanding I/O queue.
+ */
+ struct list_head queue;
+
+ void *data;
+ struct hw_fib *fib; /* Actual shared object */
+};
+
+/*
+ * Adapter Information Block
+ *
+ * This is returned by the RequestAdapterInfo block
+ */
+
+struct aac_adapter_info
+{
+ u32 platform;
+ u32 cpu;
+ u32 subcpu;
+ u32 clock;
+ u32 execmem;
+ u32 buffermem;
+ u32 totalmem;
+ u32 kernelrev;
+ u32 kernelbuild;
+ u32 monitorrev;
+ u32 monitorbuild;
+ u32 hwrev;
+ u32 hwbuild;
+ u32 biosrev;
+ u32 biosbuild;
+ u32 cluster;
+ u32 serial[2];
+ u32 battery;
+ u32 options;
+ u32 OEM;
+};
+
+/*
+ * Battery platforms
+ */
+#define AAC_BAT_REQ_PRESENT (1)
+#define AAC_BAT_REQ_NOTPRESENT (2)
+#define AAC_BAT_OPT_PRESENT (3)
+#define AAC_BAT_OPT_NOTPRESENT (4)
+#define AAC_BAT_NOT_SUPPORTED (5)
+/*
+ * cpu types
+ */
+#define AAC_CPU_SIMULATOR (1)
+#define AAC_CPU_I960 (2)
+#define AAC_CPU_STRONGARM (3)
+
+/*
+ * Supported Options
+ */
+#define AAC_OPT_SNAPSHOT cpu_to_le32(1)
+#define AAC_OPT_CLUSTERS cpu_to_le32(1<<1)
+#define AAC_OPT_WRITE_CACHE cpu_to_le32(1<<2)
+#define AAC_OPT_64BIT_DATA cpu_to_le32(1<<3)
+#define AAC_OPT_HOST_TIME_FIB cpu_to_le32(1<<4)
+#define AAC_OPT_RAID50 cpu_to_le32(1<<5)
+#define AAC_OPT_4GB_WINDOW cpu_to_le32(1<<6)
+#define AAC_OPT_SCSI_UPGRADEABLE cpu_to_le32(1<<7)
+#define AAC_OPT_SOFT_ERR_REPORT cpu_to_le32(1<<8)
+#define AAC_OPT_SUPPORTED_RECONDITION cpu_to_le32(1<<9)
+#define AAC_OPT_SGMAP_HOST64 cpu_to_le32(1<<10)
+#define AAC_OPT_ALARM cpu_to_le32(1<<11)
+#define AAC_OPT_NONDASD cpu_to_le32(1<<12)
+
+struct aac_dev
+{
+ struct aac_dev *next;
+ const char *name;
+ int id;
+
+ u16 irq_mask;
+ /*
+ * Map for 128 fib objects (64k)
+ */
+ dma_addr_t hw_fib_pa;
+ struct hw_fib *hw_fib_va;
+#if BITS_PER_LONG >= 64
+ ulong fib_base_va;
+#endif
+ /*
+ * Fib Headers
+ */
+ struct fib fibs[AAC_NUM_FIB];
+ struct fib *free_fib;
+ struct fib *timeout_fib;
+ spinlock_t fib_lock;
+
+ struct aac_queue_block *queues;
+ /*
+ * The user API will use an IOCTL to register itself to receive
+ * FIBs from the adapter. The following list is used to keep
+ * track of all the threads that have requested these FIBs. The
+ * mutex is used to synchronize access to all data associated
+ * with the adapter fibs.
+ */
+ struct list_head fib_list;
+
+ struct adapter_ops a_ops;
+ unsigned long fsrev; /* Main driver's revision number */
+
+ struct aac_init *init; /* Holds initialization info to communicate with adapter */
+ dma_addr_t init_pa; /* Holds physical address of the init struct */
+
+ struct pci_dev *pdev; /* Our PCI interface */
+ void * printfbuf; /* pointer to buffer used for printf's from the adapter */
+ void * comm_addr; /* Base address of Comm area */
+ dma_addr_t comm_phys; /* Physical Address of Comm area */
+ size_t comm_size;
+
+ struct Scsi_Host *scsi_host_ptr;
+ struct fsa_scsi_hba fsa_dev;
+ int thread_pid;
+ int cardtype;
+
+ /*
+ * The following is the device specific extension.
+ */
+ union
+ {
+ struct sa_registers *sa;
+ struct rx_registers *rx;
+ } regs;
+ /*
+ * The following is the number of the individual adapter
+ */
+ u32 devnum;
+ u32 aif_thread;
+#if 0
+ struct completion aif_completion;
+#endif
+ struct aac_adapter_info adapter_info;
+ /* These are in adapter info but they are in the io flow so
+ * lets break them out so we don't have to do an AND to check them
+ */
+ u8 nondasd_support;
+ u8 pae_support;
+};
+
+#define aac_adapter_interrupt(dev) \
+ dev->a_ops.adapter_interrupt(dev)
+
+#define aac_adapter_notify(dev, event) \
+ dev->a_ops.adapter_notify(dev, event)
+
+#define aac_adapter_enable_int(dev, event) \
+ dev->a_ops.adapter_enable_int(dev, event)
+
+#define aac_adapter_disable_int(dev, event) \
+ dev->a_ops.adapter_disable_int(dev, event)
+
+
+
+#define FIB_CONTEXT_FLAG_TIMED_OUT (0x00000001)
+
+/*
+ * Define the command values
+ */
+
+#define Null 0
+#define GetAttributes 1
+#define SetAttributes 2
+#define Lookup 3
+#define ReadLink 4
+#define Read 5
+#define Write 6
+#define Create 7
+#define MakeDirectory 8
+#define SymbolicLink 9
+#define MakeNode 10
+#define Removex 11
+#define RemoveDirectoryx 12
+#define Rename 13
+#define Link 14
+#define ReadDirectory 15
+#define ReadDirectoryPlus 16
+#define FileSystemStatus 17
+#define FileSystemInfo 18
+#define PathConfigure 19
+#define Commit 20
+#define Mount 21
+#define UnMount 22
+#define Newfs 23
+#define FsCheck 24
+#define FsSync 25
+#define SimReadWrite 26
+#define SetFileSystemStatus 27
+#define BlockRead 28
+#define BlockWrite 29
+#define NvramIoctl 30
+#define FsSyncWait 31
+#define ClearArchiveBit 32
+#define SetAcl 33
+#define GetAcl 34
+#define AssignAcl 35
+#define FaultInsertion 36 /* Fault Insertion Command */
+#define CrazyCache 37 /* Crazycache */
+
+#define MAX_FSACOMMAND_NUM 38
+
+
+/*
+ * Define the status returns. These are very unixlike although
+ * most are not in fact used
+ */
+
+#define ST_OK 0
+#define ST_PERM 1
+#define ST_NOENT 2
+#define ST_IO 5
+#define ST_NXIO 6
+#define ST_E2BIG 7
+#define ST_ACCES 13
+#define ST_EXIST 17
+#define ST_XDEV 18
+#define ST_NODEV 19
+#define ST_NOTDIR 20
+#define ST_ISDIR 21
+#define ST_INVAL 22
+#define ST_FBIG 27
+#define ST_NOSPC 28
+#define ST_ROFS 30
+#define ST_MLINK 31
+#define ST_WOULDBLOCK 35
+#define ST_NAMETOOLONG 63
+#define ST_NOTEMPTY 66
+#define ST_DQUOT 69
+#define ST_STALE 70
+#define ST_REMOTE 71
+#define ST_BADHANDLE 10001
+#define ST_NOT_SYNC 10002
+#define ST_BAD_COOKIE 10003
+#define ST_NOTSUPP 10004
+#define ST_TOOSMALL 10005
+#define ST_SERVERFAULT 10006
+#define ST_BADTYPE 10007
+#define ST_JUKEBOX 10008
+#define ST_NOTMOUNTED 10009
+#define ST_MAINTMODE 10010
+#define ST_STALEACL 10011
+
+/*
+ * On writes how does the client want the data written.
+ */
+
+#define CACHE_CSTABLE 1
+#define CACHE_UNSTABLE 2
+
+/*
+ * Lets the client know at which level the data was commited on
+ * a write request
+ */
+
+#define CMFILE_SYNCH_NVRAM 1
+#define CMDATA_SYNCH_NVRAM 2
+#define CMFILE_SYNCH 3
+#define CMDATA_SYNCH 4
+#define CMUNSTABLE 5
+
+struct aac_read
+{
+ u32 command;
+ u32 cid;
+ u32 block;
+ u32 count;
+ struct sgmap sg; // Must be last in struct because it is variable
+};
+
+struct aac_read64
+{
+ u32 command;
+ u16 cid;
+ u16 sector_count;
+ u32 block;
+ u16 pad;
+ u16 flags;
+ struct sgmap64 sg; // Must be last in struct because it is variable
+};
+
+struct aac_read_reply
+{
+ u32 status;
+ u32 count;
+};
+
+struct aac_write
+{
+ u32 command;
+ u32 cid;
+ u32 block;
+ u32 count;
+ u32 stable; // Not used
+ struct sgmap sg; // Must be last in struct because it is variable
+};
+
+struct aac_write64
+{
+ u32 command;
+ u16 cid;
+ u16 sector_count;
+ u32 block;
+ u16 pad;
+ u16 flags;
+ struct sgmap64 sg; // Must be last in struct because it is variable
+};
+struct aac_write_reply
+{
+ u32 status;
+ u32 count;
+ u32 committed;
+};
+
+struct aac_srb
+{
+ u32 function;
+ u32 channel;
+ u32 target;
+ u32 lun;
+ u32 timeout;
+ u32 flags;
+ u32 count; // Data xfer size
+ u32 retry_limit;
+ u32 cdb_size;
+ u8 cdb[16];
+ struct sgmap sg;
+};
+
+
+
+#define AAC_SENSE_BUFFERSIZE 30
+
+struct aac_srb_reply
+{
+ u32 status;
+ u32 srb_status;
+ u32 scsi_status;
+ u32 data_xfer_length;
+ u32 sense_data_size;
+ u8 sense_data[AAC_SENSE_BUFFERSIZE]; // Can this be SCSI_SENSE_BUFFERSIZE
+};
+/*
+ * SRB Flags
+ */
+#define SRB_NoDataXfer 0x0000
+#define SRB_DisableDisconnect 0x0004
+#define SRB_DisableSynchTransfer 0x0008
+#define SRB_BypassFrozenQueue 0x0010
+#define SRB_DisableAutosense 0x0020
+#define SRB_DataIn 0x0040
+#define SRB_DataOut 0x0080
+
+/*
+ * SRB Functions - set in aac_srb->function
+ */
+#define SRBF_ExecuteScsi 0x0000
+#define SRBF_ClaimDevice 0x0001
+#define SRBF_IO_Control 0x0002
+#define SRBF_ReceiveEvent 0x0003
+#define SRBF_ReleaseQueue 0x0004
+#define SRBF_AttachDevice 0x0005
+#define SRBF_ReleaseDevice 0x0006
+#define SRBF_Shutdown 0x0007
+#define SRBF_Flush 0x0008
+#define SRBF_AbortCommand 0x0010
+#define SRBF_ReleaseRecovery 0x0011
+#define SRBF_ResetBus 0x0012
+#define SRBF_ResetDevice 0x0013
+#define SRBF_TerminateIO 0x0014
+#define SRBF_FlushQueue 0x0015
+#define SRBF_RemoveDevice 0x0016
+#define SRBF_DomainValidation 0x0017
+
+/*
+ * SRB SCSI Status - set in aac_srb->scsi_status
+ */
+#define SRB_STATUS_PENDING 0x00
+#define SRB_STATUS_SUCCESS 0x01
+#define SRB_STATUS_ABORTED 0x02
+#define SRB_STATUS_ABORT_FAILED 0x03
+#define SRB_STATUS_ERROR 0x04
+#define SRB_STATUS_BUSY 0x05
+#define SRB_STATUS_INVALID_REQUEST 0x06
+#define SRB_STATUS_INVALID_PATH_ID 0x07
+#define SRB_STATUS_NO_DEVICE 0x08
+#define SRB_STATUS_TIMEOUT 0x09
+#define SRB_STATUS_SELECTION_TIMEOUT 0x0A
+#define SRB_STATUS_COMMAND_TIMEOUT 0x0B
+#define SRB_STATUS_MESSAGE_REJECTED 0x0D
+#define SRB_STATUS_BUS_RESET 0x0E
+#define SRB_STATUS_PARITY_ERROR 0x0F
+#define SRB_STATUS_REQUEST_SENSE_FAILED 0x10
+#define SRB_STATUS_NO_HBA 0x11
+#define SRB_STATUS_DATA_OVERRUN 0x12
+#define SRB_STATUS_UNEXPECTED_BUS_FREE 0x13
+#define SRB_STATUS_PHASE_SEQUENCE_FAILURE 0x14
+#define SRB_STATUS_BAD_SRB_BLOCK_LENGTH 0x15
+#define SRB_STATUS_REQUEST_FLUSHED 0x16
+#define SRB_STATUS_DELAYED_RETRY 0x17
+#define SRB_STATUS_INVALID_LUN 0x20
+#define SRB_STATUS_INVALID_TARGET_ID 0x21
+#define SRB_STATUS_BAD_FUNCTION 0x22
+#define SRB_STATUS_ERROR_RECOVERY 0x23
+#define SRB_STATUS_NOT_STARTED 0x24
+#define SRB_STATUS_NOT_IN_USE 0x30
+#define SRB_STATUS_FORCE_ABORT 0x31
+#define SRB_STATUS_DOMAIN_VALIDATION_FAIL 0x32
+
+/*
+ * Object-Server / Volume-Manager Dispatch Classes
+ */
+
+#define VM_Null 0
+#define VM_NameServe 1
+#define VM_ContainerConfig 2
+#define VM_Ioctl 3
+#define VM_FilesystemIoctl 4
+#define VM_CloseAll 5
+#define VM_CtBlockRead 6
+#define VM_CtBlockWrite 7
+#define VM_SliceBlockRead 8 /* raw access to configured "storage objects" */
+#define VM_SliceBlockWrite 9
+#define VM_DriveBlockRead 10 /* raw access to physical devices */
+#define VM_DriveBlockWrite 11
+#define VM_EnclosureMgt 12 /* enclosure management */
+#define VM_Unused 13 /* used to be diskset management */
+#define VM_CtBlockVerify 14
+#define VM_CtPerf 15 /* performance test */
+#define VM_CtBlockRead64 16
+#define VM_CtBlockWrite64 17
+#define VM_CtBlockVerify64 18
+#define VM_CtHostRead64 19
+#define VM_CtHostWrite64 20
+
+#define MAX_VMCOMMAND_NUM 21 /* used for sizing stats array - leave last */
+
+/*
+ * Descriptive information (eg, vital stats)
+ * that a content manager might report. The
+ * FileArray filesystem component is one example
+ * of a content manager. Raw mode might be
+ * another.
+ */
+
+struct aac_fsinfo {
+ u32 fsTotalSize; /* Consumed by fs, incl. metadata */
+ u32 fsBlockSize;
+ u32 fsFragSize;
+ u32 fsMaxExtendSize;
+ u32 fsSpaceUnits;
+ u32 fsMaxNumFiles;
+ u32 fsNumFreeFiles;
+ u32 fsInodeDensity;
+}; /* valid iff ObjType == FT_FILESYS && !(ContentState & FSCS_NOTCLEAN) */
+
+union aac_contentinfo {
+ struct aac_fsinfo filesys; /* valid iff ObjType == FT_FILESYS && !(ContentState & FSCS_NOTCLEAN) */
+};
+
+/*
+ * Query for "mountable" objects, ie, objects that are typically
+ * associated with a drive letter on the client (host) side.
+ */
+
+struct aac_mntent {
+ u32 oid;
+ u8 name[16]; // if applicable
+ struct creation_info create_info; // if applicable
+ u32 capacity;
+ u32 vol; // substrate structure
+ u32 obj; // FT_FILESYS, FT_DATABASE, etc.
+ u32 state; // unready for mounting, readonly, etc.
+ union aac_contentinfo fileinfo; // Info specific to content manager (eg, filesystem)
+ u32 altoid; // != oid <==> snapshot or broken mirror exists
+};
+
+#define FSCS_READONLY 0x0002 /* possible result of broken mirror */
+
+struct aac_query_mount {
+ u32 command;
+ u32 type;
+ u32 count;
+};
+
+struct aac_mount {
+ u32 status;
+ u32 type; /* should be same as that requested */
+ u32 count;
+ struct aac_mntent mnt[1];
+};
+
+/*
+ * The following command is sent to shut down each container.
+ */
+
+struct aac_close {
+ u32 command;
+ u32 cid;
+};
+
+struct aac_query_disk
+{
+ s32 cnum;
+ s32 bus;
+ s32 target;
+ s32 lun;
+ u32 valid;
+ u32 locked;
+ u32 deleted;
+ s32 instance;
+ s8 name[10];
+ u32 unmapped;
+};
+
+struct aac_delete_disk {
+ u32 disknum;
+ u32 cnum;
+};
+
+struct fib_ioctl
+{
+ char *fibctx;
+ int wait;
+ char *fib;
+};
+
+struct revision
+{
+ u32 compat;
+ u32 version;
+ u32 build;
+};
+
+/*
+ * Ugly - non Linux like ioctl coding for back compat.
+ */
+
+#define CTL_CODE(function, method) ( \
+ (4<< 16) | ((function) << 2) | (method) \
+)
+
+/*
+ * Define the method codes for how buffers are passed for I/O and FS
+ * controls
+ */
+
+#define METHOD_BUFFERED 0
+#define METHOD_NEITHER 3
+
+/*
+ * Filesystem ioctls
+ */
+
+#define FSACTL_SENDFIB CTL_CODE(2050, METHOD_BUFFERED)
+#define FSACTL_SEND_RAW_SRB CTL_CODE(2067, METHOD_BUFFERED)
+#define FSACTL_DELETE_DISK 0x163
+#define FSACTL_QUERY_DISK 0x173
+#define FSACTL_OPEN_GET_ADAPTER_FIB CTL_CODE(2100, METHOD_BUFFERED)
+#define FSACTL_GET_NEXT_ADAPTER_FIB CTL_CODE(2101, METHOD_BUFFERED)
+#define FSACTL_CLOSE_GET_ADAPTER_FIB CTL_CODE(2102, METHOD_BUFFERED)
+#define FSACTL_MINIPORT_REV_CHECK CTL_CODE(2107, METHOD_BUFFERED)
+#define FSACTL_GET_PCI_INFO CTL_CODE(2119, METHOD_BUFFERED)
+#define FSACTL_FORCE_DELETE_DISK CTL_CODE(2120, METHOD_NEITHER)
+
+
+struct aac_common
+{
+ /*
+ * If this value is set to 1 then interrupt moderation will occur
+ * in the base commuication support.
+ */
+ u32 irq_mod;
+ u32 peak_fibs;
+ u32 zero_fibs;
+ u32 fib_timeouts;
+ /*
+ * Statistical counters in debug mode
+ */
+#ifdef DBG
+ u32 FibsSent;
+ u32 FibRecved;
+ u32 NoResponseSent;
+ u32 NoResponseRecved;
+ u32 AsyncSent;
+ u32 AsyncRecved;
+ u32 NormalSent;
+ u32 NormalRecved;
+#endif
+};
+
+extern struct aac_common aac_config;
+
+
+/*
+ * The following macro is used when sending and receiving FIBs. It is
+ * only used for debugging.
+ */
+
+#if DBG
+#define FIB_COUNTER_INCREMENT(counter) (counter)++
+#else
+#define FIB_COUNTER_INCREMENT(counter)
+#endif
+
+/*
+ * Adapter direct commands
+ * Monitor/Kernel API
+ */
+
+#define BREAKPOINT_REQUEST cpu_to_le32(0x00000004)
+#define INIT_STRUCT_BASE_ADDRESS cpu_to_le32(0x00000005)
+#define READ_PERMANENT_PARAMETERS cpu_to_le32(0x0000000a)
+#define WRITE_PERMANENT_PARAMETERS cpu_to_le32(0x0000000b)
+#define HOST_CRASHING cpu_to_le32(0x0000000d)
+#define SEND_SYNCHRONOUS_FIB cpu_to_le32(0x0000000c)
+#define GET_ADAPTER_PROPERTIES cpu_to_le32(0x00000019)
+#define RE_INIT_ADAPTER cpu_to_le32(0x000000ee)
+
+/*
+ * Adapter Status Register
+ *
+ * Phase Staus mailbox is 32bits:
+ * <31:16> = Phase Status
+ * <15:0> = Phase
+ *
+ * The adapter reports is present state through the phase. Only
+ * a single phase should be ever be set. Each phase can have multiple
+ * phase status bits to provide more detailed information about the
+ * state of the board. Care should be taken to ensure that any phase
+ * status bits that are set when changing the phase are also valid
+ * for the new phase or be cleared out. Adapter software (monitor,
+ * iflash, kernel) is responsible for properly maintining the phase
+ * status mailbox when it is running.
+ *
+ * MONKER_API Phases
+ *
+ * Phases are bit oriented. It is NOT valid to have multiple bits set
+ */
+
+#define SELF_TEST_FAILED cpu_to_le32(0x00000004)
+#define KERNEL_UP_AND_RUNNING cpu_to_le32(0x00000080)
+#define KERNEL_PANIC cpu_to_le32(0x00000100)
+
+/*
+ * Doorbell bit defines
+ */
+
+#define DoorBellPrintfDone cpu_to_le32(1<<5) // Host -> Adapter
+#define DoorBellAdapterNormCmdReady cpu_to_le32(1<<1) // Adapter -> Host
+#define DoorBellAdapterNormRespReady cpu_to_le32(1<<2) // Adapter -> Host
+#define DoorBellAdapterNormCmdNotFull cpu_to_le32(1<<3) // Adapter -> Host
+#define DoorBellAdapterNormRespNotFull cpu_to_le32(1<<4) // Adapter -> Host
+#define DoorBellPrintfReady cpu_to_le32(1<<5) // Adapter -> Host
+
+/*
+ * For FIB communication, we need all of the following things
+ * to send back to the user.
+ */
+
+#define AifCmdEventNotify 1 /* Notify of event */
+#define AifCmdJobProgress 2 /* Progress report */
+#define AifCmdAPIReport 3 /* Report from other user of API */
+#define AifCmdDriverNotify 4 /* Notify host driver of event */
+#define AifReqJobList 100 /* Gets back complete job list */
+#define AifReqJobsForCtr 101 /* Gets back jobs for specific container */
+#define AifReqJobsForScsi 102 /* Gets back jobs for specific SCSI device */
+#define AifReqJobReport 103 /* Gets back a specific job report or list of them */
+#define AifReqTerminateJob 104 /* Terminates job */
+#define AifReqSuspendJob 105 /* Suspends a job */
+#define AifReqResumeJob 106 /* Resumes a job */
+#define AifReqSendAPIReport 107 /* API generic report requests */
+#define AifReqAPIJobStart 108 /* Start a job from the API */
+#define AifReqAPIJobUpdate 109 /* Update a job report from the API */
+#define AifReqAPIJobFinish 110 /* Finish a job from the API */
+
+/*
+ * Adapter Initiated FIB command structures. Start with the adapter
+ * initiated FIBs that really come from the adapter, and get responded
+ * to by the host.
+ */
+
+struct aac_aifcmd {
+ u32 command; /* Tell host what type of notify this is */
+ u32 seqnum; /* To allow ordering of reports (if necessary) */
+ u8 data[1]; /* Undefined length (from kernel viewpoint) */
+};
+
+static inline u32 fib2addr(struct hw_fib *hw)
+{
+ return (u32)hw;
+}
+
+static inline struct hw_fib *addr2fib(u32 addr)
+{
+ return (struct hw_fib *)addr;
+}
+
+const char *aac_driverinfo(struct Scsi_Host *);
+struct fib *fib_alloc(struct aac_dev *dev);
+int fib_setup(struct aac_dev *dev);
+void fib_map_free(struct aac_dev *dev);
+void fib_free(struct fib * context);
+void fib_init(struct fib * context);
+void fib_dealloc(struct fib * context);
+void aac_printf(struct aac_dev *dev, u32 val);
+int fib_send(u16 command, struct fib * context, unsigned long size, int priority, int wait, int reply, fib_callback callback, void *ctxt);
+int aac_consumer_get(struct aac_dev * dev, struct aac_queue * q, struct aac_entry **entry);
+int aac_consumer_avail(struct aac_dev * dev, struct aac_queue * q);
+void aac_consumer_free(struct aac_dev * dev, struct aac_queue * q, u32 qnum);
+int fib_complete(struct fib * context);
+#define fib_data(fibctx) ((void *)(fibctx)->fib->data)
+int aac_detach(struct aac_dev *dev);
+struct aac_dev *aac_init_adapter(struct aac_dev *dev);
+int aac_get_containers(struct aac_dev *dev);
+int aac_scsi_cmd(Scsi_Cmnd *scsi_cmnd_ptr);
+int aac_dev_ioctl(struct aac_dev *dev, int cmd, void *arg);
+int aac_do_ioctl(struct aac_dev * dev, int cmd, void *arg);
+int aac_rx_init(struct aac_dev *dev, unsigned long devNumber);
+int aac_sa_init(struct aac_dev *dev, unsigned long devNumber);
+unsigned int aac_response_normal(struct aac_queue * q);
+unsigned int aac_command_normal(struct aac_queue * q);
+#ifdef TRY_TASKLET
+extern struct tasklet_struct aac_command_tasklet;
+int aac_command_thread(unsigned long data);
+#else
+int aac_command_thread(struct aac_dev * dev);
+#endif
+int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
+int fib_adapter_complete(struct fib * fibptr, unsigned short size);
+struct aac_driver_ident* aac_get_driver_ident(int devtype);
+int aac_get_adapter_info(struct aac_dev* dev);
diff --git a/xen/drivers/scsi/aacraid/commctrl.c b/xen/drivers/scsi/aacraid/commctrl.c
new file mode 100644
index 0000000000..15b6a62c6f
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/commctrl.c
@@ -0,0 +1,438 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ * commctrl.c
+ *
+ * Abstract: Contains all routines for control of the AFA comm layer
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/* #include <xeno/spinlock.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/completion.h> */
+#include <xeno/blk.h>
+/* #include <asm/semaphore.h> */
+#include <asm/uaccess.h>
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+/**
+ * ioctl_send_fib - send a FIB from userspace
+ * @dev: adapter is being processed
+ * @arg: arguments to the ioctl call
+ *
+ * This routine sends a fib to the adapter on behalf of a user level
+ * program.
+ */
+
+static int ioctl_send_fib(struct aac_dev * dev, void *arg)
+{
+ struct hw_fib * kfib;
+ struct fib *fibptr;
+
+ fibptr = fib_alloc(dev);
+ if(fibptr == NULL)
+ return -ENOMEM;
+
+ kfib = fibptr->fib;
+ /*
+ * First copy in the header so that we can check the size field.
+ */
+ if (copy_from_user((void *)kfib, arg, sizeof(struct aac_fibhdr))) {
+ fib_free(fibptr);
+ return -EFAULT;
+ }
+ /*
+ * Since we copy based on the fib header size, make sure that we
+ * will not overrun the buffer when we copy the memory. Return
+ * an error if we would.
+ */
+ if(le32_to_cpu(kfib->header.Size) > sizeof(struct hw_fib) - sizeof(struct aac_fibhdr)) {
+ fib_free(fibptr);
+ return -EINVAL;
+ }
+
+ if (copy_from_user((void *) kfib, arg, le32_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr))) {
+ fib_free(fibptr);
+ return -EFAULT;
+ }
+
+ if (kfib->header.Command == cpu_to_le32(TakeABreakPt)) {
+ aac_adapter_interrupt(dev);
+ /*
+ * Since we didn't really send a fib, zero out the state to allow
+ * cleanup code not to assert.
+ */
+ kfib->header.XferState = 0;
+ } else {
+ if (fib_send(kfib->header.Command, fibptr, le32_to_cpu(kfib->header.Size) , FsaNormal,
+ 1, 1, NULL, NULL) != 0)
+ {
+ fib_free(fibptr);
+ return -EINVAL;
+ }
+ if (fib_complete(fibptr) != 0) {
+ fib_free(fibptr);
+ return -EINVAL;
+ }
+ }
+ /*
+ * Make sure that the size returned by the adapter (which includes
+ * the header) is less than or equal to the size of a fib, so we
+ * don't corrupt application data. Then copy that size to the user
+ * buffer. (Don't try to add the header information again, since it
+ * was already included by the adapter.)
+ */
+
+ if (copy_to_user(arg, (void *)kfib, kfib->header.Size)) {
+ fib_free(fibptr);
+ return -EFAULT;
+ }
+ fib_free(fibptr);
+ return 0;
+}
+
+/**
+ * open_getadapter_fib - Get the next fib
+ *
+ * This routine will get the next Fib, if available, from the AdapterFibContext
+ * passed in from the user.
+ */
+
+static int open_getadapter_fib(struct aac_dev * dev, void *arg)
+{
+ struct aac_fib_context * fibctx;
+ int status;
+ unsigned long flags;
+
+ fibctx = kmalloc(sizeof(struct aac_fib_context), GFP_KERNEL);
+ if (fibctx == NULL) {
+ status = -ENOMEM;
+ } else {
+ fibctx->type = FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT;
+ fibctx->size = sizeof(struct aac_fib_context);
+#if 0
+ /*
+ * Initialize the mutex used to wait for the next AIF.
+ */
+ init_MUTEX_LOCKED(&fibctx->wait_sem);
+#endif
+ fibctx->wait = 0;
+ /*
+ * Initialize the fibs and set the count of fibs on
+ * the list to 0.
+ */
+ fibctx->count = 0;
+ INIT_LIST_HEAD(&fibctx->fibs);
+ fibctx->jiffies = jiffies/HZ;
+ /*
+ * Now add this context onto the adapter's
+ * AdapterFibContext list.
+ */
+ spin_lock_irqsave(&dev->fib_lock, flags);
+ list_add_tail(&fibctx->next, &dev->fib_list);
+ spin_unlock_irqrestore(&dev->fib_lock, flags);
+ if (copy_to_user(arg, &fibctx, sizeof(struct aac_fib_context *))) {
+ status = -EFAULT;
+ } else {
+ status = 0;
+ }
+ }
+ return status;
+}
+
+/**
+ * next_getadapter_fib - get the next fib
+ * @dev: adapter to use
+ * @arg: ioctl argument
+ *
+ * This routine will get the next Fib, if available, from the AdapterFibContext
+ * passed in from the user.
+ */
+
+static int next_getadapter_fib(struct aac_dev * dev, void *arg)
+{
+ struct fib_ioctl f;
+ struct aac_fib_context *fibctx, *aifcp;
+ struct hw_fib * fib;
+ int status;
+ struct list_head * entry;
+ int found;
+ unsigned long flags;
+
+ if(copy_from_user((void *)&f, arg, sizeof(struct fib_ioctl)))
+ return -EFAULT;
+ /*
+ * Extract the AdapterFibContext from the Input parameters.
+ */
+ fibctx = (struct aac_fib_context *) f.fibctx;
+
+ /*
+ * Verify that the HANDLE passed in was a valid AdapterFibContext
+ *
+ * Search the list of AdapterFibContext addresses on the adapter
+ * to be sure this is a valid address
+ */
+ found = 0;
+ entry = dev->fib_list.next;
+
+ while(entry != &dev->fib_list) {
+ aifcp = list_entry(entry, struct aac_fib_context, next);
+ if(fibctx == aifcp) { /* We found a winner */
+ found = 1;
+ break;
+ }
+ entry = entry->next;
+ }
+ if (found == 0)
+ return -EINVAL;
+
+ if((fibctx->type != FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT) ||
+ (fibctx->size != sizeof(struct aac_fib_context)))
+ return -EINVAL;
+ status = 0;
+ spin_lock_irqsave(&dev->fib_lock, flags);
+ /*
+ * If there are no fibs to send back, then either wait or return
+ * -EAGAIN
+ */
+return_fib:
+ if (!list_empty(&fibctx->fibs)) {
+ struct list_head * entry;
+ /*
+ * Pull the next fib from the fibs
+ */
+ entry = fibctx->fibs.next;
+ list_del(entry);
+
+ fib = list_entry(entry, struct hw_fib, header.FibLinks);
+ fibctx->count--;
+ spin_unlock_irqrestore(&dev->fib_lock, flags);
+ if (copy_to_user(f.fib, fib, sizeof(struct hw_fib))) {
+ kfree(fib);
+ return -EFAULT;
+ }
+ /*
+ * Free the space occupied by this copy of the fib.
+ */
+ kfree(fib);
+ status = 0;
+ fibctx->jiffies = jiffies/HZ;
+ } else {
+ spin_unlock_irqrestore(&dev->fib_lock, flags);
+ if (f.wait) {
+#if 0
+ if(down_interruptible(&fibctx->wait_sem) < 0) {
+ status = -EINTR;
+ } else {
+#else
+ {
+#endif
+ /* Lock again and retry */
+ spin_lock_irqsave(&dev->fib_lock, flags);
+ goto return_fib;
+ }
+ } else {
+ status = -EAGAIN;
+ }
+ }
+ return status;
+}
+
+int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context * fibctx)
+{
+ struct hw_fib *fib;
+
+ /*
+ * First free any FIBs that have not been consumed.
+ */
+ while (!list_empty(&fibctx->fibs)) {
+ struct list_head * entry;
+ /*
+ * Pull the next fib from the fibs
+ */
+ entry = fibctx->fibs.next;
+ list_del(entry);
+ fib = list_entry(entry, struct hw_fib, header.FibLinks);
+ fibctx->count--;
+ /*
+ * Free the space occupied by this copy of the fib.
+ */
+ kfree(fib);
+ }
+ /*
+ * Remove the Context from the AdapterFibContext List
+ */
+ list_del(&fibctx->next);
+ /*
+ * Invalidate context
+ */
+ fibctx->type = 0;
+ /*
+ * Free the space occupied by the Context
+ */
+ kfree(fibctx);
+ return 0;
+}
+
+/**
+ * close_getadapter_fib - close down user fib context
+ * @dev: adapter
+ * @arg: ioctl arguments
+ *
+ * This routine will close down the fibctx passed in from the user.
+ */
+
+static int close_getadapter_fib(struct aac_dev * dev, void *arg)
+{
+ struct aac_fib_context *fibctx, *aifcp;
+ int status;
+ unsigned long flags;
+ struct list_head * entry;
+ int found;
+
+ /*
+ * Extract the fibctx from the input parameters
+ */
+ fibctx = arg;
+
+ /*
+ * Verify that the HANDLE passed in was a valid AdapterFibContext
+ *
+ * Search the list of AdapterFibContext addresses on the adapter
+ * to be sure this is a valid address
+ */
+
+ found = 0;
+ entry = dev->fib_list.next;
+
+ while(entry != &dev->fib_list) {
+ aifcp = list_entry(entry, struct aac_fib_context, next);
+ if(fibctx == aifcp) { /* We found a winner */
+ found = 1;
+ break;
+ }
+ entry = entry->next;
+ }
+
+ if(found == 0)
+ return 0; /* Already gone */
+
+ if((fibctx->type != FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT) ||
+ (fibctx->size != sizeof(struct aac_fib_context)))
+ return -EINVAL;
+ spin_lock_irqsave(&dev->fib_lock, flags);
+ status = aac_close_fib_context(dev, fibctx);
+ spin_unlock_irqrestore(&dev->fib_lock, flags);
+ return status;
+}
+
+/**
+ * check_revision - close down user fib context
+ * @dev: adapter
+ * @arg: ioctl arguments
+ *
+ * This routine returns the firmware version.
+ * Under Linux, there have been no version incompatibilities, so this is simple!
+ */
+
+static int check_revision(struct aac_dev *dev, void *arg)
+{
+ struct revision response;
+
+ response.compat = 1;
+ response.version = dev->adapter_info.kernelrev;
+ response.build = dev->adapter_info.kernelbuild;
+
+ if (copy_to_user(arg, &response, sizeof(response)))
+ return -EFAULT;
+ return 0;
+}
+
+
+struct aac_pci_info {
+ u32 bus;
+ u32 slot;
+};
+
+
+int aac_get_pci_info(struct aac_dev* dev, void* arg)
+{
+ struct aac_pci_info pci_info;
+
+ pci_info.bus = dev->pdev->bus->number;
+ pci_info.slot = PCI_SLOT(dev->pdev->devfn);
+
+ if(copy_to_user( arg, (void*)&pci_info, sizeof(struct aac_pci_info)))
+ return -EFAULT;
+ return 0;
+ }
+
+
+int aac_do_ioctl(struct aac_dev * dev, int cmd, void *arg)
+{
+ int status;
+
+ /*
+ * HBA gets first crack
+ */
+
+ status = aac_dev_ioctl(dev, cmd, arg);
+ if(status != -ENOTTY)
+ return status;
+
+ switch (cmd) {
+ case FSACTL_MINIPORT_REV_CHECK:
+ status = check_revision(dev, arg);
+ break;
+ case FSACTL_SENDFIB:
+ status = ioctl_send_fib(dev, arg);
+ break;
+ case FSACTL_OPEN_GET_ADAPTER_FIB:
+ status = open_getadapter_fib(dev, arg);
+ break;
+ case FSACTL_GET_NEXT_ADAPTER_FIB:
+ status = next_getadapter_fib(dev, arg);
+ break;
+ case FSACTL_CLOSE_GET_ADAPTER_FIB:
+ status = close_getadapter_fib(dev, arg);
+ break;
+ case FSACTL_GET_PCI_INFO:
+ status = aac_get_pci_info(dev,arg);
+ break;
+ default:
+ status = -ENOTTY;
+ break;
+ }
+ return status;
+}
+
diff --git a/xen/drivers/scsi/aacraid/comminit.c b/xen/drivers/scsi/aacraid/comminit.c
new file mode 100644
index 0000000000..29a3dba28e
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/comminit.c
@@ -0,0 +1,350 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ * comminit.c
+ *
+ * Abstract: This supports the initialization of the host adapter commuication interface.
+ * This is a platform dependent module for the pci cyclone board.
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <xeno/spinlock.h>
+/* #include <xeno/slab.h> */
+#include <xeno/blk.h>
+/* #include <xeno/completion.h> */
+/* #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+struct aac_common aac_config;
+
+static struct aac_dev *devices;
+
+static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long commsize, unsigned long commalign)
+{
+ unsigned char *base;
+ unsigned long size, align;
+ unsigned long fibsize = 4096;
+ unsigned long printfbufsiz = 256;
+ struct aac_init *init;
+ dma_addr_t phys;
+
+ /* FIXME: Adaptec add 128 bytes to this value - WHY ?? */
+ size = fibsize + sizeof(struct aac_init) + commsize + commalign + printfbufsiz;
+
+ base = pci_alloc_consistent(dev->pdev, size, &phys);
+ if(base == NULL)
+ {
+ printk(KERN_ERR "aacraid: unable to create mapping.\n");
+ return 0;
+ }
+ dev->comm_addr = (void *)base;
+ dev->comm_phys = phys;
+ dev->comm_size = size;
+
+ dev->init = (struct aac_init *)(base + fibsize);
+ dev->init_pa = phys + fibsize;
+
+ /*
+ * Cache the upper bits of the virtual mapping for 64bit boxes
+ * FIXME: this crap should be rewritten
+ */
+#if BITS_PER_LONG >= 64
+ dev->fib_base_va = ((ulong)base & 0xffffffff00000000);
+#endif
+
+ init = dev->init;
+
+ init->InitStructRevision = cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION);
+ init->MiniPortRevision = cpu_to_le32(Sa_MINIPORT_REVISION);
+ init->fsrev = cpu_to_le32(dev->fsrev);
+
+ /*
+ * Adapter Fibs are the first thing allocated so that they
+ * start page aligned
+ */
+ init->AdapterFibsVirtualAddress = cpu_to_le32((u32)base);
+ init->AdapterFibsPhysicalAddress = cpu_to_le32(phys);
+ init->AdapterFibsSize = cpu_to_le32(fibsize);
+ init->AdapterFibAlign = cpu_to_le32(sizeof(struct hw_fib));
+
+ /*
+ * Increment the base address by the amount already used
+ */
+ base = base + fibsize + sizeof(struct aac_init);
+ phys = phys + fibsize + sizeof(struct aac_init);
+ /*
+ * Align the beginning of Headers to commalign
+ */
+ align = (commalign - ((unsigned long)(base) & (commalign - 1)));
+ base = base + align;
+ phys = phys + align;
+ /*
+ * Fill in addresses of the Comm Area Headers and Queues
+ */
+ *commaddr = (unsigned long *)base;
+ init->CommHeaderAddress = cpu_to_le32(phys);
+ /*
+ * Increment the base address by the size of the CommArea
+ */
+ base = base + commsize;
+ phys = phys + commsize;
+ /*
+ * Place the Printf buffer area after the Fast I/O comm area.
+ */
+ dev->printfbuf = (void *)base;
+ init->printfbuf = cpu_to_le32(phys);
+ init->printfbufsiz = cpu_to_le32(printfbufsiz);
+ memset(base, 0, printfbufsiz);
+ return 1;
+}
+
+static void aac_queue_init(struct aac_dev * dev, struct aac_queue * q, u32 *mem, int qsize)
+{
+ q->numpending = 0;
+ q->dev = dev;
+ INIT_LIST_HEAD(&q->pendingq);
+#if 0
+ init_waitqueue_head(&q->cmdready);
+#endif
+ INIT_LIST_HEAD(&q->cmdq);
+#if 0
+ init_waitqueue_head(&q->qfull);
+#endif
+ spin_lock_init(&q->lockdata);
+ q->lock = &q->lockdata;
+ q->headers.producer = mem;
+ q->headers.consumer = mem+1;
+ *q->headers.producer = cpu_to_le32(qsize);
+ *q->headers.consumer = cpu_to_le32(qsize);
+ q->entries = qsize;
+}
+
+/**
+ * aac_send_shutdown - shutdown an adapter
+ * @dev: Adapter to shutdown
+ *
+ * This routine will send a VM_CloseAll (shutdown) request to the adapter.
+ */
+
+static int aac_send_shutdown(struct aac_dev * dev)
+{
+ struct fib * fibctx;
+ struct aac_close *cmd;
+ int status;
+
+ fibctx = fib_alloc(dev);
+ fib_init(fibctx);
+
+ cmd = (struct aac_close *) fib_data(fibctx);
+
+ cmd->command = cpu_to_le32(VM_CloseAll);
+ cmd->cid = cpu_to_le32(0xffffffff);
+
+ status = fib_send(ContainerCommand,
+ fibctx,
+ sizeof(struct aac_close),
+ FsaNormal,
+ 1, 1,
+ NULL, NULL);
+
+ if (status == 0)
+ fib_complete(fibctx);
+ fib_free(fibctx);
+ return status;
+}
+
+/**
+ * aac_detach - detach adapter
+ * @detach: adapter to disconnect
+ *
+ * Disconnect and shutdown an AAC based adapter, freeing resources
+ * as we go.
+ */
+
+int aac_detach(struct aac_dev *detach)
+{
+ struct aac_dev **dev = &devices;
+
+ while(*dev)
+ {
+ if(*dev == detach)
+ {
+ *dev = detach->next;
+ aac_send_shutdown(detach);
+ fib_map_free(detach);
+ pci_free_consistent(detach->pdev, detach->comm_size, detach->comm_addr, detach->comm_phys);
+ kfree(detach->queues);
+ return 1;
+ }
+ dev=&((*dev)->next);
+ }
+ BUG();
+ return 0;
+}
+
+/**
+ * aac_comm_init - Initialise FSA data structures
+ * @dev: Adapter to intialise
+ *
+ * Initializes the data structures that are required for the FSA commuication
+ * interface to operate.
+ * Returns
+ * 1 - if we were able to init the commuication interface.
+ * 0 - If there were errors initing. This is a fatal error.
+ */
+
+int aac_comm_init(struct aac_dev * dev)
+{
+ unsigned long hdrsize = (sizeof(u32) * NUMBER_OF_COMM_QUEUES) * 2;
+ unsigned long queuesize = sizeof(struct aac_entry) * TOTAL_QUEUE_ENTRIES;
+ u32 *headers;
+ struct aac_entry * queues;
+ unsigned long size;
+ struct aac_queue_block * comm = dev->queues;
+
+ /*
+ * Now allocate and initialize the zone structures used as our
+ * pool of FIB context records. The size of the zone is based
+ * on the system memory size. We also initialize the mutex used
+ * to protect the zone.
+ */
+ spin_lock_init(&dev->fib_lock);
+
+ /*
+ * Allocate the physically contigous space for the commuication
+ * queue headers.
+ */
+
+ size = hdrsize + queuesize;
+
+ if (!aac_alloc_comm(dev, (void * *)&headers, size, QUEUE_ALIGNMENT))
+ return -ENOMEM;
+
+ queues = (struct aac_entry *)((unsigned char *)headers + hdrsize);
+
+ /* Adapter to Host normal proirity Command queue */
+ comm->queue[HostNormCmdQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[HostNormCmdQueue], headers, HOST_NORM_CMD_ENTRIES);
+ queues += HOST_NORM_CMD_ENTRIES;
+ headers += 2;
+
+ /* Adapter to Host high priority command queue */
+ comm->queue[HostHighCmdQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[HostHighCmdQueue], headers, HOST_HIGH_CMD_ENTRIES);
+
+ queues += HOST_HIGH_CMD_ENTRIES;
+ headers +=2;
+
+ /* Host to adapter normal priority command queue */
+ comm->queue[AdapNormCmdQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[AdapNormCmdQueue], headers, ADAP_NORM_CMD_ENTRIES);
+
+ queues += ADAP_NORM_CMD_ENTRIES;
+ headers += 2;
+
+ /* host to adapter high priority command queue */
+ comm->queue[AdapHighCmdQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[AdapHighCmdQueue], headers, ADAP_HIGH_CMD_ENTRIES);
+
+ queues += ADAP_HIGH_CMD_ENTRIES;
+ headers += 2;
+
+ /* adapter to host normal priority response queue */
+ comm->queue[HostNormRespQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[HostNormRespQueue], headers, HOST_NORM_RESP_ENTRIES);
+
+ queues += HOST_NORM_RESP_ENTRIES;
+ headers += 2;
+
+ /* adapter to host high priority response queue */
+ comm->queue[HostHighRespQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[HostHighRespQueue], headers, HOST_HIGH_RESP_ENTRIES);
+
+ queues += HOST_HIGH_RESP_ENTRIES;
+ headers += 2;
+
+ /* host to adapter normal priority response queue */
+ comm->queue[AdapNormRespQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[AdapNormRespQueue], headers, ADAP_NORM_RESP_ENTRIES);
+
+ queues += ADAP_NORM_RESP_ENTRIES;
+ headers += 2;
+
+ /* host to adapter high priority response queue */
+ comm->queue[AdapHighRespQueue].base = queues;
+ aac_queue_init(dev, &comm->queue[AdapHighRespQueue], headers, ADAP_HIGH_RESP_ENTRIES);
+
+ comm->queue[AdapNormCmdQueue].lock = comm->queue[HostNormRespQueue].lock;
+ comm->queue[AdapHighCmdQueue].lock = comm->queue[HostHighRespQueue].lock;
+ comm->queue[AdapNormRespQueue].lock = comm->queue[HostNormCmdQueue].lock;
+ comm->queue[AdapHighRespQueue].lock = comm->queue[HostHighCmdQueue].lock;
+
+ return 0;
+}
+
+struct aac_dev *aac_init_adapter(struct aac_dev *dev)
+{
+ /*
+ * Ok now init the communication subsystem
+ */
+ dev->queues = (struct aac_queue_block *)
+ kmalloc(sizeof(struct aac_queue_block), GFP_KERNEL);
+ if (dev->queues == NULL) {
+ printk(KERN_ERR "Error could not allocate comm region.\n");
+ return NULL;
+ }
+ memset(dev->queues, 0, sizeof(struct aac_queue_block));
+
+ printk("aac_init_adapater, dev is %p\n", dev);
+ if (aac_comm_init(dev)<0)
+ return NULL;
+ printk("aac_init_adapater, dev->init is %p\n", dev->init);
+ /*
+ * Initialize the list of fibs
+ */
+ if(fib_setup(dev)<0)
+ return NULL;
+
+ INIT_LIST_HEAD(&dev->fib_list);
+#if 0
+ init_completion(&dev->aif_completion);
+#endif
+ /*
+ * Add this adapter in to our dev List.
+ */
+ dev->next = devices;
+ devices = dev;
+ return dev;
+}
+
+
diff --git a/xen/drivers/scsi/aacraid/commsup.c b/xen/drivers/scsi/aacraid/commsup.c
new file mode 100644
index 0000000000..7d84ad241c
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/commsup.c
@@ -0,0 +1,1028 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ * commsup.c
+ *
+ * Abstract: Contain all routines that are required for FSA host/adapter
+ * commuication.
+ *
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <xeno/spinlock.h>
+
+#include <xeno/interrupt.h> /* tasklet stuff */
+
+/* #include <xeno/slab.h> */
+/* #include <xeno/completion.h> */
+/* #include <asm/semaphore.h> */
+#include <xeno/blk.h>
+#include <xeno/delay.h>
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+/**
+ * fib_map_alloc - allocate the fib objects
+ * @dev: Adapter to allocate for
+ *
+ * Allocate and map the shared PCI space for the FIB blocks used to
+ * talk to the Adaptec firmware.
+ */
+
+static int fib_map_alloc(struct aac_dev *dev)
+{
+ if((dev->hw_fib_va =
+ pci_alloc_consistent(dev->pdev, sizeof(struct hw_fib) * AAC_NUM_FIB,
+ &dev->hw_fib_pa))==NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * fib_map_free - free the fib objects
+ * @dev: Adapter to free
+ *
+ * Free the PCI mappings and the memory allocated for FIB blocks
+ * on this adapter.
+ */
+
+void fib_map_free(struct aac_dev *dev)
+{
+ pci_free_consistent(dev->pdev, sizeof(struct hw_fib) * AAC_NUM_FIB,
+ dev->hw_fib_va, dev->hw_fib_pa);
+}
+
+/**
+ * fib_setup - setup the fibs
+ * @dev: Adapter to set up
+ *
+ * Allocate the PCI space for the fibs, map it and then intialise the
+ * fib area, the unmapped fib data and also the free list
+ */
+
+int fib_setup(struct aac_dev * dev)
+{
+ struct fib *fibptr;
+ struct hw_fib *fib;
+ dma_addr_t fibpa;
+ int i;
+
+ if(fib_map_alloc(dev)<0)
+ return -ENOMEM;
+
+ fib = dev->hw_fib_va;
+ fibpa = dev->hw_fib_pa;
+ memset(fib, 0, sizeof(struct hw_fib) * AAC_NUM_FIB);
+ /*
+ * Initialise the fibs
+ */
+ for (i = 0, fibptr = &dev->fibs[i]; i < AAC_NUM_FIB; i++, fibptr++)
+ {
+ fibptr->dev = dev;
+ fibptr->fib = fib;
+ fibptr->data = (void *) fibptr->fib->data;
+ fibptr->next = fibptr+1; /* Forward chain the fibs */
+#if 0
+ init_MUTEX_LOCKED(&fibptr->event_wait);
+#endif
+ spin_lock_init(&fibptr->event_lock);
+ fib->header.XferState = cpu_to_le32(0xffffffff);
+ fib->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
+ fibptr->logicaladdr = (unsigned long) fibpa;
+ fib = (struct hw_fib *)((unsigned char *)fib + sizeof(struct hw_fib));
+ fibpa = fibpa + sizeof(struct hw_fib);
+ }
+ /*
+ * Add the fib chain to the free list
+ */
+ dev->fibs[AAC_NUM_FIB-1].next = NULL;
+ /*
+ * Enable this to debug out of queue space
+ */
+ dev->free_fib = &dev->fibs[0];
+ return 0;
+}
+
+/**
+ * fib_alloc - allocate a fib
+ * @dev: Adapter to allocate the fib for
+ *
+ * Allocate a fib from the adapter fib pool. If the pool is empty we
+ * wait for fibs to become free.
+ */
+
+struct fib * fib_alloc(struct aac_dev *dev)
+{
+ struct fib * fibptr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->fib_lock, flags);
+ fibptr = dev->free_fib;
+ if(!fibptr)
+ BUG();
+ dev->free_fib = fibptr->next;
+ spin_unlock_irqrestore(&dev->fib_lock, flags);
+ /*
+ * Set the proper node type code and node byte size
+ */
+ fibptr->type = FSAFS_NTC_FIB_CONTEXT;
+ fibptr->size = sizeof(struct fib);
+ /*
+ * Null out fields that depend on being zero at the start of
+ * each I/O
+ */
+ fibptr->fib->header.XferState = cpu_to_le32(0);
+ fibptr->callback = NULL;
+ fibptr->callback_data = NULL;
+
+ return fibptr;
+}
+
+/**
+ * fib_free - free a fib
+ * @fibptr: fib to free up
+ *
+ * Frees up a fib and places it on the appropriate queue
+ * (either free or timed out)
+ */
+
+void fib_free(struct fib * fibptr)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
+
+ if (fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT) {
+ aac_config.fib_timeouts++;
+ fibptr->next = fibptr->dev->timeout_fib;
+ fibptr->dev->timeout_fib = fibptr;
+ } else {
+ if (fibptr->fib->header.XferState != 0) {
+ printk(KERN_WARNING "fib_free, XferState != 0, "
+ "fibptr = 0x%p, XferState = 0x%x\n",
+ (void *)fibptr, fibptr->fib->header.XferState);
+ }
+ fibptr->next = fibptr->dev->free_fib;
+ fibptr->dev->free_fib = fibptr;
+ }
+ spin_unlock_irqrestore(&fibptr->dev->fib_lock, flags);
+}
+
+/**
+ * fib_init - initialise a fib
+ * @fibptr: The fib to initialize
+ *
+ * Set up the generic fib fields ready for use
+ */
+
+void fib_init(struct fib *fibptr)
+{
+ struct hw_fib *fib = fibptr->fib;
+
+ fib->header.StructType = FIB_MAGIC;
+ fib->header.Size = cpu_to_le16(sizeof(struct hw_fib));
+ fib->header.XferState = cpu_to_le32(HostOwned | FibInitialized |
+ FibEmpty | FastResponseCapable);
+ fib->header.SenderFibAddress = cpu_to_le32(0);
+ fib->header.ReceiverFibAddress = cpu_to_le32(0);
+ fib->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
+}
+
+/**
+ * fib_deallocate - deallocate a fib
+ * @fibptr: fib to deallocate
+ *
+ * Will deallocate and return to the free pool the FIB pointed to by the
+ * caller.
+ */
+
+void fib_dealloc(struct fib * fibptr)
+{
+ struct hw_fib *fib = fibptr->fib;
+ if(fib->header.StructType != FIB_MAGIC)
+ BUG();
+ fib->header.XferState = cpu_to_le32(0);
+}
+
+/*
+ * Commuication primitives define and support the queuing method we use to
+ * support host to adapter commuication. All queue accesses happen through
+ * these routines and are the only routines which have a knowledge of the
+ * how these queues are implemented.
+ */
+
+/**
+ * aac_get_entry - get a queue entry
+ * @dev: Adapter
+ * @qid: Queue Number
+ * @entry: Entry return
+ * @index: Index return
+ * @nonotify: notification control
+ *
+ * With a priority the routine returns a queue entry if the queue has free entries. If the queue
+ * is full(no free entries) than no entry is returned and the function returns 0 otherwise 1 is
+ * returned.
+ */
+
+static int aac_get_entry (struct aac_dev * dev, u32 qid, struct aac_entry **entry, u32 * index, unsigned long *nonotify)
+{
+ struct aac_queue * q;
+
+ /*
+ * All of the queues wrap when they reach the end, so we check
+ * to see if they have reached the end and if they have we just
+ * set the index back to zero. This is a wrap. You could or off
+ * the high bits in all updates but this is a bit faster I think.
+ */
+
+ q = &dev->queues->queue[qid];
+
+ *index = le32_to_cpu(*(q->headers.producer));
+ if (*index - 2 == le32_to_cpu(*(q->headers.consumer)))
+ *nonotify = 1;
+
+ if (qid == AdapHighCmdQueue) {
+ if (*index >= ADAP_HIGH_CMD_ENTRIES)
+ *index = 0;
+ } else if (qid == AdapNormCmdQueue) {
+ if (*index >= ADAP_NORM_CMD_ENTRIES)
+ *index = 0; /* Wrap to front of the Producer Queue. */
+ }
+ else if (qid == AdapHighRespQueue)
+ {
+ if (*index >= ADAP_HIGH_RESP_ENTRIES)
+ *index = 0;
+ }
+ else if (qid == AdapNormRespQueue)
+ {
+ if (*index >= ADAP_NORM_RESP_ENTRIES)
+ *index = 0; /* Wrap to front of the Producer Queue. */
+ }
+ else BUG();
+
+ if (*index + 1 == le32_to_cpu(*(q->headers.consumer))) { /* Queue full */
+ printk(KERN_WARNING "Queue %d full, %ld outstanding.\n",
+ qid, q->numpending);
+ return 0;
+ } else {
+ *entry = q->base + *index;
+ return 1;
+ }
+}
+
+/**
+ * aac_queue_get - get the next free QE
+ * @dev: Adapter
+ * @index: Returned index
+ * @priority: Priority of fib
+ * @fib: Fib to associate with the queue entry
+ * @wait: Wait if queue full
+ * @fibptr: Driver fib object to go with fib
+ * @nonotify: Don't notify the adapter
+ *
+ * Gets the next free QE off the requested priorty adapter command
+ * queue and associates the Fib with the QE. The QE represented by
+ * index is ready to insert on the queue when this routine returns
+ * success.
+ */
+
+static int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * fib, int wait, struct fib * fibptr, unsigned long *nonotify)
+{
+ struct aac_entry * entry = NULL;
+ int map = 0;
+ struct aac_queue * q = &dev->queues->queue[qid];
+
+ spin_lock_irqsave(q->lock, q->SavedIrql);
+
+ if (qid == AdapHighCmdQueue || qid == AdapNormCmdQueue)
+ {
+ /* if no entries wait for some if caller wants to */
+ while (!aac_get_entry(dev, qid, &entry, index, nonotify))
+ {
+ printk(KERN_ERR "GetEntries failed\n");
+ }
+ /*
+ * Setup queue entry with a command, status and fib mapped
+ */
+ entry->size = cpu_to_le32(le16_to_cpu(fib->header.Size));
+ map = 1;
+ }
+ else if (qid == AdapHighRespQueue || qid == AdapNormRespQueue)
+ {
+ while(!aac_get_entry(dev, qid, &entry, index, nonotify))
+ {
+ /* if no entries wait for some if caller wants to */
+ }
+ /*
+ * Setup queue entry with command, status and fib mapped
+ */
+ entry->size = cpu_to_le32(le16_to_cpu(fib->header.Size));
+ entry->addr = cpu_to_le32(fib->header.SenderFibAddress); /* Restore adapters pointer to the FIB */
+ fib->header.ReceiverFibAddress = fib->header.SenderFibAddress; /* Let the adapter now where to find its data */
+ map = 0;
+ }
+ /*
+ * If MapFib is true than we need to map the Fib and put pointers
+ * in the queue entry.
+ */
+ if (map)
+ entry->addr = cpu_to_le32((unsigned long)(fibptr->logicaladdr));
+ return 0;
+}
+
+
+/**
+ * aac_insert_entry - insert a queue entry
+ * @dev: Adapter
+ * @index: Index of entry to insert
+ * @qid: Queue number
+ * @nonotify: Suppress adapter notification
+ *
+ * Gets the next free QE off the requested priorty adapter command
+ * queue and associates the Fib with the QE. The QE represented by
+ * index is ready to insert on the queue when this routine returns
+ * success.
+ */
+
+static int aac_insert_entry(struct aac_dev * dev, u32 index, u32 qid, unsigned long nonotify)
+{
+ struct aac_queue * q = &dev->queues->queue[qid];
+
+ if(q == NULL)
+ BUG();
+ *(q->headers.producer) = cpu_to_le32(index + 1);
+ spin_unlock_irqrestore(q->lock, q->SavedIrql);
+
+ if (qid == AdapHighCmdQueue ||
+ qid == AdapNormCmdQueue ||
+ qid == AdapHighRespQueue ||
+ qid == AdapNormRespQueue)
+ {
+ if (!nonotify)
+ aac_adapter_notify(dev, qid);
+ }
+ else
+ printk("Suprise insert!\n");
+ return 0;
+}
+
+/*
+ * Define the highest level of host to adapter communication routines.
+ * These routines will support host to adapter FS commuication. These
+ * routines have no knowledge of the commuication method used. This level
+ * sends and receives FIBs. This level has no knowledge of how these FIBs
+ * get passed back and forth.
+ */
+
+/**
+ * fib_send - send a fib to the adapter
+ * @command: Command to send
+ * @fibptr: The fib
+ * @size: Size of fib data area
+ * @priority: Priority of Fib
+ * @wait: Async/sync select
+ * @reply: True if a reply is wanted
+ * @callback: Called with reply
+ * @callback_data: Passed to callback
+ *
+ * Sends the requested FIB to the adapter and optionally will wait for a
+ * response FIB. If the caller does not wish to wait for a response than
+ * an event to wait on must be supplied. This event will be set when a
+ * response FIB is received from the adapter.
+ */
+
+int fib_send(u16 command, struct fib * fibptr, unsigned long size, int priority, int wait, int reply, fib_callback callback, void * callback_data)
+{
+ u32 index;
+ u32 qid;
+ struct aac_dev * dev = fibptr->dev;
+ unsigned long nointr = 0;
+ struct hw_fib * fib = fibptr->fib;
+ struct aac_queue * q;
+ unsigned long flags = 0;
+
+ if (!(le32_to_cpu(fib->header.XferState) & HostOwned))
+ return -EBUSY;
+ /*
+ * There are 5 cases with the wait and reponse requested flags.
+ * The only invalid cases are if the caller requests to wait and
+ * does not request a response and if the caller does not want a
+ * response and the Fibis not allocated from pool. If a response
+ * is not requesed the Fib will just be deallocaed by the DPC
+ * routine when the response comes back from the adapter. No
+ * further processing will be done besides deleting the Fib. We
+ * will have a debug mode where the adapter can notify the host
+ * it had a problem and the host can log that fact.
+ */
+ if (wait && !reply) {
+ return -EINVAL;
+ } else if (!wait && reply) {
+ fib->header.XferState |= cpu_to_le32(Async | ResponseExpected);
+ FIB_COUNTER_INCREMENT(aac_config.AsyncSent);
+ } else if (!wait && !reply) {
+ fib->header.XferState |= cpu_to_le32(NoResponseExpected);
+ FIB_COUNTER_INCREMENT(aac_config.NoResponseSent);
+ } else if (wait && reply) {
+ fib->header.XferState |= cpu_to_le32(ResponseExpected);
+ FIB_COUNTER_INCREMENT(aac_config.NormalSent);
+ }
+ /*
+ * Map the fib into 32bits by using the fib number
+ */
+ fib->header.SenderData = fibptr-&dev->fibs[0]; /* for callback */
+ /*
+ * Set FIB state to indicate where it came from and if we want a
+ * response from the adapter. Also load the command from the
+ * caller.
+ *
+ * Map the hw fib pointer as a 32bit value
+ */
+ fib->header.SenderFibAddress = fib2addr(fib);
+ fib->header.Command = cpu_to_le16(command);
+ fib->header.XferState |= cpu_to_le32(SentFromHost);
+ fibptr->fib->header.Flags = 0; /* Zero flags field - its internal only */
+ /*
+ * Set the size of the Fib we want to send to the adapter
+ */
+ fib->header.Size = cpu_to_le16(sizeof(struct aac_fibhdr) + size);
+ if (le16_to_cpu(fib->header.Size) > le16_to_cpu(fib->header.SenderSize)) {
+ return -EMSGSIZE;
+ }
+ /*
+ * Get a queue entry connect the FIB to it and send an notify
+ * the adapter a command is ready.
+ */
+ if (priority == FsaHigh) {
+ fib->header.XferState |= cpu_to_le32(HighPriority);
+ qid = AdapHighCmdQueue;
+ } else {
+ fib->header.XferState |= cpu_to_le32(NormalPriority);
+ qid = AdapNormCmdQueue;
+ }
+ q = &dev->queues->queue[qid];
+
+ if(wait)
+ spin_lock_irqsave(&fibptr->event_lock, flags);
+
+ if(aac_queue_get( dev, &index, qid, fib, 1, fibptr, &nointr)<0)
+ return -EWOULDBLOCK;
+ dprintk((KERN_DEBUG "fib_send: inserting a queue entry at index %d.\n",
+ index));
+ dprintk((KERN_DEBUG "Fib contents:.\n"));
+ dprintk((KERN_DEBUG " Command = %d.\n",
+ fib->header.Command));
+ dprintk((KERN_DEBUG " XferState = %x.\n",
+ fib->header.XferState));
+ /*
+ * Fill in the Callback and CallbackContext if we are not
+ * going to wait.
+ */
+ if (!wait) {
+ fibptr->callback = callback;
+ fibptr->callback_data = callback_data;
+ }
+ FIB_COUNTER_INCREMENT(aac_config.FibsSent);
+ list_add_tail(&fibptr->queue, &q->pendingq);
+ q->numpending++;
+
+ fibptr->done = 0;
+
+ if(aac_insert_entry(dev, index, qid,
+ (nointr & aac_config.irq_mod)) < 0)
+ return -EWOULDBLOCK;
+ /*
+ * If the caller wanted us to wait for response wait now.
+ */
+
+ if (wait) {
+ spin_unlock_irqrestore(&fibptr->event_lock, flags);
+#if 0
+ down(&fibptr->event_wait);
+#endif
+#ifdef TRY_TASKLET
+ /*
+ * XXX KAF: Well, this is pretty gross. We should probably
+ * do_softirq() after scheduling the tasklet, as long as we
+ * are _sure_ we hold no locks here...
+ */
+ printk("about to softirq aac_command_thread...\n");
+ while (!fibptr->done) {
+ tasklet_schedule(&aac_command_tasklet);
+ mdelay(100);
+ }
+ printk("back from softirq cmd thread and fibptr->done!\n");
+#else
+ printk("about to bail at aac_command_thread...\n");
+ while (!fibptr->done) {
+ mdelay(100);
+ aac_command_thread(dev);
+ }
+ printk("back from command thread and fibptr->done!\n");
+#endif
+/* if(fibptr->done == 0) */
+/* BUG(); */
+
+ if((fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT))
+ return -ETIMEDOUT;
+ else
+ return 0;
+ }
+ /*
+ * If the user does not want a response than return success otherwise
+ * return pending
+ */
+ if (reply)
+ return -EINPROGRESS;
+ else
+ return 0;
+}
+
+/**
+ * aac_consumer_get - get the top of the queue
+ * @dev: Adapter
+ * @q: Queue
+ * @entry: Return entry
+ *
+ * Will return a pointer to the entry on the top of the queue
+ * requested that we are a consumer of, and return the address of
+ * the queue entry. It does * not change the state of the queue.
+ */
+
+int aac_consumer_get(struct aac_dev * dev, struct aac_queue * q, struct aac_entry **entry)
+{
+ u32 index;
+ int status;
+
+ if (*q->headers.producer == *q->headers.consumer) {
+ status = 0;
+ } else {
+ /*
+ * The consumer index must be wrapped if we have reached
+ * the end of the queue, else we just use the entry
+ * pointed to by the header index
+ */
+ if (le32_to_cpu(*q->headers.consumer) >= q->entries)
+ index = 0;
+ else
+ index = le32_to_cpu(*q->headers.consumer);
+ *entry = q->base + index;
+ status = 1;
+ }
+ return(status);
+}
+
+int aac_consumer_avail(struct aac_dev *dev, struct aac_queue * q)
+{
+ return (*q->headers.producer != *q->headers.consumer);
+}
+
+
+/**
+ * aac_consumer_free - free consumer entry
+ * @dev: Adapter
+ * @q: Queue
+ * @qid: Queue ident
+ *
+ * Frees up the current top of the queue we are a consumer of. If the
+ * queue was full notify the producer that the queue is no longer full.
+ */
+
+void aac_consumer_free(struct aac_dev * dev, struct aac_queue *q, u32 qid)
+{
+ int wasfull = 0;
+ u32 notify;
+
+ if (*q->headers.producer+1 == *q->headers.consumer)
+ wasfull = 1;
+
+ if (le32_to_cpu(*q->headers.consumer) >= q->entries)
+ *q->headers.consumer = cpu_to_le32(1);
+ else
+ *q->headers.consumer =
+ cpu_to_le32(le32_to_cpu(*q->headers.consumer)+1);
+
+ if (wasfull) {
+ switch (qid) {
+
+ case HostNormCmdQueue:
+ notify = HostNormCmdNotFull;
+ break;
+ case HostHighCmdQueue:
+ notify = HostHighCmdNotFull;
+ break;
+ case HostNormRespQueue:
+ notify = HostNormRespNotFull;
+ break;
+ case HostHighRespQueue:
+ notify = HostHighRespNotFull;
+ break;
+ default:
+ BUG();
+ return;
+ }
+ aac_adapter_notify(dev, notify);
+ }
+}
+
+/**
+ * fib_adapter_complete - complete adapter issued fib
+ * @fibptr: fib to complete
+ * @size: size of fib
+ *
+ * Will do all necessary work to complete a FIB that was sent from
+ * the adapter.
+ */
+
+int fib_adapter_complete(struct fib * fibptr, unsigned short size)
+{
+ struct hw_fib * fib = fibptr->fib;
+ struct aac_dev * dev = fibptr->dev;
+ unsigned long nointr = 0;
+
+ if (le32_to_cpu(fib->header.XferState) == 0)
+ return 0;
+ /*
+ * If we plan to do anything check the structure type first.
+ */
+ if ( fib->header.StructType != FIB_MAGIC ) {
+ return -EINVAL;
+ }
+ /*
+ * This block handles the case where the adapter had sent us a
+ * command and we have finished processing the command. We
+ * call completeFib when we are done processing the command
+ * and want to send a response back to the adapter. This will
+ * send the completed cdb to the adapter.
+ */
+ if (fib->header.XferState & cpu_to_le32(SentFromAdapter)) {
+ fib->header.XferState |= cpu_to_le32(HostProcessed);
+ if (fib->header.XferState & cpu_to_le32(HighPriority)) {
+ u32 index;
+ if (size)
+ {
+ size += sizeof(struct aac_fibhdr);
+ if (size > le16_to_cpu(fib->header.SenderSize))
+ return -EMSGSIZE;
+ fib->header.Size = cpu_to_le16(size);
+ }
+ if(aac_queue_get(dev, &index, AdapHighRespQueue,
+ fib, 1, NULL, &nointr) < 0) {
+ return -EWOULDBLOCK;
+ }
+ if (aac_insert_entry(dev, index, AdapHighRespQueue,
+ (nointr & (int)aac_config.irq_mod)) != 0) {
+ }
+ }
+ else if (fib->header.XferState & NormalPriority)
+ {
+ u32 index;
+
+ if (size) {
+ size += sizeof(struct aac_fibhdr);
+ if (size > le16_to_cpu(fib->header.SenderSize))
+ return -EMSGSIZE;
+ fib->header.Size = cpu_to_le16(size);
+ }
+ if (aac_queue_get(dev, &index, AdapNormRespQueue,
+ fib, 1, NULL, &nointr) < 0)
+ return -EWOULDBLOCK;
+ if (aac_insert_entry(dev, index, AdapNormRespQueue,
+ (nointr & (int)aac_config.irq_mod)) != 0)
+ {
+ }
+ }
+ }
+ else
+ {
+ printk(KERN_WARNING
+ "fib_adapter_complete: Unknown xferstate detected.\n");
+ BUG();
+ }
+ return 0;
+}
+
+/**
+ * fib_complete - fib completion handler
+ * @fib: FIB to complete
+ *
+ * Will do all necessary work to complete a FIB.
+ */
+
+int fib_complete(struct fib * fibptr)
+{
+ struct hw_fib * fib = fibptr->fib;
+
+ /*
+ * Check for a fib which has already been completed
+ */
+
+ if (fib->header.XferState == cpu_to_le32(0))
+ return 0;
+ /*
+ * If we plan to do anything check the structure type first.
+ */
+
+ if (fib->header.StructType != FIB_MAGIC)
+ return -EINVAL;
+ /*
+ * This block completes a cdb which orginated on the host and we
+ * just need to deallocate the cdb or reinit it. At this point the
+ * command is complete that we had sent to the adapter and this
+ * cdb could be reused.
+ */
+ if((fib->header.XferState & cpu_to_le32(SentFromHost)) &&
+ (fib->header.XferState & cpu_to_le32(AdapterProcessed)))
+ {
+ fib_dealloc(fibptr);
+ }
+ else if(fib->header.XferState & cpu_to_le32(SentFromHost))
+ {
+ /*
+ * This handles the case when the host has aborted the I/O
+ * to the adapter because the adapter is not responding
+ */
+ fib_dealloc(fibptr);
+ } else if(fib->header.XferState & cpu_to_le32(HostOwned)) {
+ fib_dealloc(fibptr);
+ } else {
+ BUG();
+ }
+ return 0;
+}
+
+/**
+ * aac_printf - handle printf from firmware
+ * @dev: Adapter
+ * @val: Message info
+ *
+ * Print a message passed to us by the controller firmware on the
+ * Adaptec board
+ */
+
+void aac_printf(struct aac_dev *dev, u32 val)
+{
+ int length = val & 0xffff;
+ int level = (val >> 16) & 0xffff;
+ char *cp = dev->printfbuf;
+
+ /*
+ * The size of the printfbuf is set in port.c
+ * There is no variable or define for it
+ */
+ if (length > 255)
+ length = 255;
+ if (cp[length] != 0)
+ cp[length] = 0;
+ if (level == LOG_HIGH_ERROR)
+ printk(KERN_WARNING "aacraid:%s", cp);
+ else
+ printk(KERN_INFO "aacraid:%s", cp);
+ memset(cp, 0, 256);
+}
+
+
+/**
+ * aac_handle_aif - Handle a message from the firmware
+ * @dev: Which adapter this fib is from
+ * @fibptr: Pointer to fibptr from adapter
+ *
+ * This routine handles a driver notify fib from the adapter and
+ * dispatches it to the appropriate routine for handling.
+ */
+
+static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
+{
+ struct hw_fib * fib = fibptr->fib;
+ /*
+ * Set the status of this FIB to be Invalid parameter.
+ *
+ * *(u32 *)fib->data = ST_INVAL;
+ */
+ *(u32 *)fib->data = cpu_to_le32(ST_OK);
+ fib_adapter_complete(fibptr, sizeof(u32));
+}
+
+/**
+ * aac_command_thread - command processing thread
+ * @dev: Adapter to monitor
+ *
+ * Waits on the commandready event in it's queue. When the event gets set
+ * it will pull FIBs off it's queue. It will continue to pull FIBs off
+ * until the queue is empty. When the queue is empty it will wait for
+ * more FIBs.
+ */
+
+#ifndef TRY_TASKLET
+DECLARE_TASKLET_DISABLED(aac_command_tasklet, aac_command_thread, 0);
+int aac_command_thread(struct aac_dev * dev)
+{
+#else
+int aac_command_thread(unsigned long data)
+{
+ struct aac_dev *dev = (struct aac_dev *)data;
+#endif
+ struct hw_fib *fib, *newfib;
+ struct fib fibptr; /* for error logging */
+ struct aac_queue_block *queues = dev->queues;
+ struct aac_fib_context *fibctx;
+ unsigned long flags;
+#if 0
+ DECLARE_WAITQUEUE(wait, current);
+#endif
+
+ /*
+ * We can only have one thread per adapter for AIF's.
+ */
+ printk("aac_command_'thread': entered.\n");
+ if (dev->aif_thread)
+ return -EINVAL;
+
+#if 0
+ /*
+ * Set up the name that will appear in 'ps'
+ * stored in task_struct.comm[16].
+ */
+ sprintf(current->comm, "aacraid");
+ daemonize();
+#endif
+
+ /*
+ * Let the DPC know it has a place to send the AIF's to.
+ */
+ dev->aif_thread = 1;
+ memset(&fibptr, 0, sizeof(struct fib));
+#if 0
+ add_wait_queue(&queues->queue[HostNormCmdQueue].cmdready, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+#endif
+// while(1)
+ {
+
+ printk("aac_command_thread: in 'loop'\n");
+ spin_lock_irqsave(queues->queue[HostNormCmdQueue].lock, flags);
+ printk("flags = %x\n", flags);
+ while(!list_empty(&(queues->queue[HostNormCmdQueue].cmdq))) {
+ struct list_head *entry;
+ struct aac_aifcmd * aifcmd;
+
+#if 0
+ set_current_state(TASK_RUNNING);
+#endif
+
+
+ entry = queues->queue[HostNormCmdQueue].cmdq.next;
+ list_del(entry);
+
+ spin_unlock_irqrestore(queues->queue[HostNormCmdQueue].lock,flags);
+ fib = list_entry(entry, struct hw_fib, header.FibLinks);
+ printk("aac_command_thread: got fib \n");
+ /*
+ * We will process the FIB here or pass it to a
+ * worker thread that is TBD. We Really can't
+ * do anything at this point since we don't have
+ * anything defined for this thread to do.
+ */
+ memset(&fibptr, 0, sizeof(struct fib));
+ fibptr.type = FSAFS_NTC_FIB_CONTEXT;
+ fibptr.size = sizeof( struct fib );
+ fibptr.fib = fib;
+ fibptr.data = fib->data;
+ fibptr.dev = dev;
+ /*
+ * We only handle AifRequest fibs from the adapter.
+ */
+ aifcmd = (struct aac_aifcmd *) fib->data;
+ if (aifcmd->command == le16_to_cpu(AifCmdDriverNotify)) {
+ printk("aac_command_thread: handling aif... :-( \n");
+ aac_handle_aif(dev, &fibptr);
+ } else {
+ /* The u32 here is important and intended. We are using
+ 32bit wrapping time to fit the adapter field */
+ u32 time_now, time_last;
+ unsigned long flagv;
+
+ time_now = jiffies/HZ;
+
+ spin_lock_irqsave(&dev->fib_lock, flagv);
+ entry = dev->fib_list.next;
+ /*
+ * For each Context that is on the
+ * fibctxList, make a copy of the
+ * fib, and then set the event to wake up the
+ * thread that is waiting for it.
+ */
+ while (entry != &dev->fib_list) {
+ /*
+ * Extract the fibctx
+ */
+ fibctx = list_entry(entry, struct aac_fib_context, next);
+ /*
+ * Check if the queue is getting
+ * backlogged
+ */
+ if (fibctx->count > 20)
+ {
+ time_last = fibctx->jiffies;
+ /*
+ * Has it been > 2 minutes
+ * since the last read off
+ * the queue?
+ */
+ if ((time_now - time_last) > 120) {
+ entry = entry->next;
+ aac_close_fib_context(dev, fibctx);
+ continue;
+ }
+ }
+ /*
+ * Warning: no sleep allowed while
+ * holding spinlock
+ */
+ newfib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC);
+ if (newfib) {
+ /*
+ * Make the copy of the FIB
+ */
+ memcpy(newfib, fib, sizeof(struct hw_fib));
+ /*
+ * Put the FIB onto the
+ * fibctx's fibs
+ */
+ list_add_tail(&newfib->header.FibLinks, &fibctx->fibs);
+ fibctx->count++;
+#if 0
+ /*
+ * Set the event to wake up the
+ * thread that will waiting.
+ */
+ up(&fibctx->wait_sem);
+#endif
+ } else {
+ printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
+ }
+ entry = entry->next;
+ }
+ /*
+ * Set the status of this FIB
+ */
+ *(u32 *)fib->data = cpu_to_le32(ST_OK);
+ fib_adapter_complete(&fibptr, sizeof(u32));
+ spin_unlock_irqrestore(&dev->fib_lock, flagv);
+ }
+ spin_lock_irqsave(queues->queue[HostNormCmdQueue].lock, flags);
+ }
+ /*
+ * There are no more AIF's
+ */
+ spin_unlock_irqrestore(queues->queue[HostNormCmdQueue].lock, flags);
+#if 0
+ schedule();
+
+ if(signal_pending(current))
+ break;
+ set_current_state(TASK_INTERRUPTIBLE);
+#endif
+
+ }
+
+#if 0
+ remove_wait_queue(&queues->queue[HostNormCmdQueue].cmdready, &wait);
+ dev->aif_thread = 0;
+ complete_and_exit(&dev->aif_completion, 0);
+#else
+ mdelay(50);
+ dev->aif_thread = 0;
+
+#endif
+ return 0;
+}
diff --git a/xen/drivers/scsi/aacraid/dpcsup.c b/xen/drivers/scsi/aacraid/dpcsup.c
new file mode 100644
index 0000000000..c9b4dfe123
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/dpcsup.c
@@ -0,0 +1,207 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ * dpcsup.c
+ *
+ * Abstract: All DPC processing routines for the cyclone board occur here.
+ *
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/* #include <xeno/spinlock.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/completion.h> */
+#include <xeno/blk.h>
+/* #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+/**
+ * aac_response_normal - Handle command replies
+ * @q: Queue to read from
+ *
+ * This DPC routine will be run when the adapter interrupts us to let us
+ * know there is a response on our normal priority queue. We will pull off
+ * all QE there are and wake up all the waiters before exiting. We will
+ * take a spinlock out on the queue before operating on it.
+ */
+
+unsigned int aac_response_normal(struct aac_queue * q)
+{
+ struct aac_dev * dev = q->dev;
+ struct aac_entry *entry;
+ struct hw_fib * hwfib;
+ struct fib * fib;
+ int consumed = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(q->lock, flags);
+
+ /*
+ * Keep pulling response QEs off the response queue and waking
+ * up the waiters until there are no more QEs. We then return
+ * back to the system. If no response was requesed we just
+ * deallocate the Fib here and continue.
+ */
+ while(aac_consumer_get(dev, q, &entry))
+ {
+ int fast;
+
+ fast = (int) (entry->addr & 0x01);
+ hwfib = addr2fib(entry->addr & ~0x01);
+ aac_consumer_free(dev, q, HostNormRespQueue);
+ fib = &dev->fibs[hwfib->header.SenderData];
+ /*
+ * Remove this fib from the Outstanding I/O queue.
+ * But only if it has not already been timed out.
+ *
+ * If the fib has been timed out already, then just
+ * continue. The caller has already been notified that
+ * the fib timed out.
+ */
+ if (!(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
+ list_del(&fib->queue);
+ dev->queues->queue[AdapNormCmdQueue].numpending--;
+ } else {
+ printk(KERN_WARNING "aacraid: FIB timeout (%x).\n", fib->flags);
+ continue;
+ }
+ spin_unlock_irqrestore(q->lock, flags);
+
+ if (fast) {
+ /*
+ * Doctor the fib
+ */
+ *(u32 *)hwfib->data = cpu_to_le32(ST_OK);
+ hwfib->header.XferState |= cpu_to_le32(AdapterProcessed);
+ }
+
+ FIB_COUNTER_INCREMENT(aac_config.FibRecved);
+
+ if (hwfib->header.Command == cpu_to_le16(NuFileSystem))
+ {
+ u32 *pstatus = (u32 *)hwfib->data;
+ if (*pstatus & cpu_to_le32(0xffff0000))
+ *pstatus = cpu_to_le32(ST_OK);
+ }
+ if (hwfib->header.XferState & cpu_to_le32(NoResponseExpected | Async))
+ {
+ if (hwfib->header.XferState & cpu_to_le32(NoResponseExpected))
+ FIB_COUNTER_INCREMENT(aac_config.NoResponseRecved);
+ else
+ FIB_COUNTER_INCREMENT(aac_config.AsyncRecved);
+ /*
+ * NOTE: we cannot touch the fib after this
+ * call, because it may have been deallocated.
+ */
+ fib->callback(fib->callback_data, fib);
+ } else {
+#if 0
+ unsigned long flagv;
+ spin_lock_irqsave(&fib->event_lock, flagv);
+#endif
+ fib->done = 1;
+#if 0
+ up(&fib->event_wait);
+ spin_unlock_irqrestore(&fib->event_lock, flagv);
+#endif
+ FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
+ }
+ consumed++;
+ spin_lock_irqsave(q->lock, flags);
+ }
+
+ if (consumed > aac_config.peak_fibs)
+ aac_config.peak_fibs = consumed;
+ if (consumed == 0)
+ aac_config.zero_fibs++;
+
+ spin_unlock_irqrestore(q->lock, flags);
+ return 0;
+}
+
+
+/**
+ * aac_command_normal - handle commands
+ * @q: queue to process
+ *
+ * This DPC routine will be queued when the adapter interrupts us to
+ * let us know there is a command on our normal priority queue. We will
+ * pull off all QE there are and wake up all the waiters before exiting.
+ * We will take a spinlock out on the queue before operating on it.
+ */
+
+unsigned int aac_command_normal(struct aac_queue *q)
+{
+ struct aac_dev * dev = q->dev;
+ struct aac_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(q->lock, flags);
+
+ /*
+ * Keep pulling response QEs off the response queue and waking
+ * up the waiters until there are no more QEs. We then return
+ * back to the system.
+ */
+ while(aac_consumer_get(dev, q, &entry))
+ {
+ struct hw_fib * fib;
+ fib = addr2fib(entry->addr);
+
+ if (dev->aif_thread) {
+ list_add_tail(&fib->header.FibLinks, &q->cmdq);
+ aac_consumer_free(dev, q, HostNormCmdQueue);
+#if 0
+ wake_up_interruptible(&q->cmdready);
+#endif
+ } else {
+ struct fib fibctx;
+ aac_consumer_free(dev, q, HostNormCmdQueue);
+ spin_unlock_irqrestore(q->lock, flags);
+ memset(&fibctx, 0, sizeof(struct fib));
+ fibctx.type = FSAFS_NTC_FIB_CONTEXT;
+ fibctx.size = sizeof(struct fib);
+ fibctx.fib = fib;
+ fibctx.data = fib->data;
+ fibctx.dev = dev;
+ /*
+ * Set the status of this FIB
+ */
+ *(u32 *)fib->data = cpu_to_le32(ST_OK);
+ fib_adapter_complete(&fibctx, sizeof(u32));
+ spin_lock_irqsave(q->lock, flags);
+ }
+ }
+ spin_unlock_irqrestore(q->lock, flags);
+ return 0;
+}
diff --git a/xen/drivers/scsi/aacraid/linit.c b/xen/drivers/scsi/aacraid/linit.c
new file mode 100644
index 0000000000..b5026d9065
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/linit.c
@@ -0,0 +1,794 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ * linit.c
+ *
+ * Abstract: Linux Driver entry module for Adaptec RAID Array Controller
+ *
+ * Provides the following driver entry points:
+ * aac_detect()
+ * aac_release()
+ * aac_queuecommand()
+ * aac_resetcommand()
+ * aac_biosparm()
+ *
+ */
+
+#define AAC_DRIVER_VERSION "0.9.9ac6-TEST"
+#define AAC_DRIVER_BUILD_DATE __DATE__
+
+#include <xeno/module.h>
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <xeno/spinlock.h>
+/* #include <xeno/slab.h> */
+/* #include <xeno/completion.h> */
+/* #include <asm/semaphore.h> */
+#include <xeno/blk.h>
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+#include "sd.h"
+
+#define AAC_DRIVERNAME "aacraid"
+
+MODULE_AUTHOR("Red Hat Inc and Adaptec");
+MODULE_DESCRIPTION("Supports Dell PERC2, 2/Si, 3/Si, 3/Di, PERC 320/DC, Adaptec 2120S, 2200S, 5400S, and HP NetRAID-4M devices. http://domsch.com/xeno/ or http://linux.adaptec.com");
+MODULE_LICENSE("GPL");
+MODULE_PARM(nondasd, "i");
+MODULE_PARM_DESC(nondasd, "Control scanning of hba for nondasd devices. 0=off, 1=on");
+
+static int nondasd=-1;
+
+struct aac_dev *aac_devices[MAXIMUM_NUM_ADAPTERS];
+
+static unsigned aac_count = 0;
+static int aac_cfg_major = -1;
+
+/*
+ * Because of the way Linux names scsi devices, the order in this table has
+ * become important. Check for on-board Raid first, add-in cards second.
+ *
+ * dmb - For now we add the number of channels to this structure.
+ * In the future we should add a fib that reports the number of channels
+ * for the card. At that time we can remove the channels from here
+ */
+
+static struct aac_driver_ident aac_drivers[] = {
+ { 0x1028, 0x0001, 0x1028, 0x0001, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 2/Si */
+ { 0x1028, 0x0002, 0x1028, 0x0002, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Di */
+ { 0x1028, 0x0003, 0x1028, 0x0003, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Si */
+ { 0x1028, 0x0004, 0x1028, 0x00d0, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Si */
+ { 0x1028, 0x0002, 0x1028, 0x00d1, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Di */
+ { 0x1028, 0x0002, 0x1028, 0x00d9, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Di */
+ { 0x1028, 0x000a, 0x1028, 0x0106, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Di */
+ { 0x1028, 0x000a, 0x1028, 0x011b, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Di */
+ { 0x1028, 0x000a, 0x1028, 0x0121, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* PERC 3/Di */
+ { 0x9005, 0x0283, 0x9005, 0x0283, aac_rx_init, "aacraid", "ADAPTEC ", "catapult ", 2 }, /* catapult*/
+ { 0x9005, 0x0284, 0x9005, 0x0284, aac_rx_init, "aacraid", "ADAPTEC ", "tomcat ", 2 }, /* tomcat*/
+ { 0x9005, 0x0285, 0x9005, 0x0286, aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1 }, /* Adaptec 2120S (Crusader)*/
+ { 0x9005, 0x0285, 0x9005, 0x0285, aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2 }, /* Adaptec 2200S (Vulcan)*/
+ { 0x9005, 0x0285, 0x9005, 0x0287, aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2 }, /* Adaptec 2200S (Vulcan-2m)*/
+ { 0x9005, 0x0285, 0x1028, 0x0287, aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2 }, /* Dell PERC 320/DC */
+ { 0x1011, 0x0046, 0x9005, 0x0365, aac_sa_init, "aacraid", "ADAPTEC ", "Adaptec 5400S ", 4 }, /* Adaptec 5400S (Mustang)*/
+ { 0x1011, 0x0046, 0x9005, 0x0364, aac_sa_init, "aacraid", "ADAPTEC ", "AAC-364 ", 4 }, /* Adaptec 5400S (Mustang)*/
+ { 0x1011, 0x0046, 0x9005, 0x1364, aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4 }, /* Dell PERC2 "Quad Channel" */
+ { 0x1011, 0x0046, 0x103c, 0x10c2, aac_sa_init, "hpnraid", "HP ", "NetRAID-4M ", 4 } /* HP NetRAID-4M */
+};
+
+#define NUM_AACTYPES (sizeof(aac_drivers) / sizeof(struct aac_driver_ident))
+static int num_aacdrivers = NUM_AACTYPES;
+
+#if 0
+static int aac_cfg_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
+static int aac_cfg_open(struct inode * inode, struct file * file);
+static int aac_cfg_release(struct inode * inode,struct file * file);
+
+static struct file_operations aac_cfg_fops = {
+/* owner: THIS_MODULE, */
+ ioctl: aac_cfg_ioctl,
+ open: aac_cfg_open,
+ release: aac_cfg_release
+};
+#endif
+
+static int aac_detect(Scsi_Host_Template *);
+static int aac_release(struct Scsi_Host *);
+static int aac_queuecommand(Scsi_Cmnd *, void (*CompletionRoutine)(Scsi_Cmnd *));
+static int aac_biosparm(Scsi_Disk *, kdev_t, int *);
+#ifdef CONFIG_PROC_FS
+static int aac_procinfo(char *, char **, off_t, int, int, int);
+#endif
+static int aac_ioctl(Scsi_Device *, int, void *);
+static int aac_eh_abort(Scsi_Cmnd * cmd);
+static int aac_eh_device_reset(Scsi_Cmnd* cmd);
+static int aac_eh_bus_reset(Scsi_Cmnd* cmd);
+static int aac_eh_reset(Scsi_Cmnd* cmd);
+
+static void aac_queuedepth(struct Scsi_Host *, Scsi_Device *);
+
+/**
+ * aac_detect - Probe for aacraid cards
+ * @template: SCSI driver template
+ *
+ * Probe for AAC Host Adapters initialize, register, and report the
+ * configuration of each AAC Host Adapter found.
+ * Returns the number of adapters successfully initialized and
+ * registered.
+ * Initializes all data necessary for this particular SCSI driver.
+ * Notes:
+ * The detect routine must not call any of the mid level functions
+ * to queue commands because things are not guaranteed to be set
+ * up yet. The detect routine can send commands to the host adapter
+ * as long as the program control will not be passed to scsi.c in
+ * the processing of the command. Note especially that
+ * scsi_malloc/scsi_free must not be called.
+ *
+ */
+static int aac_detect(Scsi_Host_Template *template)
+{
+ int index;
+ int container;
+ u16 vendor_id, device_id;
+ struct Scsi_Host *host_ptr;
+ struct pci_dev *dev = NULL;
+ struct aac_dev *aac;
+ struct fsa_scsi_hba *fsa_dev_ptr;
+ char *name = NULL;
+
+ printk(KERN_INFO "Red Hat/Adaptec aacraid driver, %s\n",
+ AAC_DRIVER_BUILD_DATE);
+
+
+ /*
+ ** XXX SMH: we need to take interrupts during detect, but the SCSI
+ ** layer is holding this lock with interrupts disabled. I don't
+ ** know how this works on vanilla linux (we 'down' on a semaphone
+ ** at one point during the process -- how do we wake?)
+ */
+ spin_unlock_irq(&io_request_lock);
+
+
+ /* setting up the proc directory structure */
+ template->proc_name = "aacraid";
+
+ for( index = 0; index != num_aacdrivers; index++ )
+ {
+ device_id = aac_drivers[index].device;
+ vendor_id = aac_drivers[index].vendor;
+ name = aac_drivers[index].name;
+ dprintk((KERN_DEBUG "Checking %s %x/%x/%x/%x.\n",
+ name, vendor_id, device_id,
+ aac_drivers[index].subsystem_vendor,
+ aac_drivers[index].subsystem_device));
+
+ dev = NULL;
+ while((dev = pci_find_device(vendor_id, device_id, dev))) {
+ if (pci_enable_device(dev))
+ continue;
+ pci_set_master(dev);
+ pci_set_dma_mask(dev, 0xFFFFFFFFULL);
+
+ if((dev->subsystem_vendor != aac_drivers[index].subsystem_vendor) ||
+ (dev->subsystem_device != aac_drivers[index].subsystem_device))
+ continue;
+
+ dprintk((KERN_DEBUG "%s device detected.\n", name));
+ dprintk((KERN_DEBUG "%x/%x/%x/%x.\n", vendor_id, device_id,
+ aac_drivers[index].subsystem_vendor,
+ aac_drivers[index].subsystem_device));
+ /* Increment the host adapter count */
+ aac_count++;
+ /*
+ * scsi_register() allocates memory for a Scsi_Hosts
+ * structure and links it into the linked list of host
+ * adapters. This linked list contains the data for all
+ * possible <supported> scsi hosts. This is similar to
+ * the Scsi_Host_Template, except that we have one entry
+ * for each actual physical host adapter on the system,
+ * stored as a linked list. If there are two AAC boards,
+ * then we will need to make two Scsi_Host entries, but
+ * there will be only one Scsi_Host_Template entry. The
+ * second argument to scsi_register() specifies the size
+ * of the extra memory we want to hold any device specific
+ * information. */
+ host_ptr = scsi_register( template, sizeof(struct aac_dev) );
+ /*
+ * These three parameters can be used to allow for wide SCSI
+ * and for host adapters that support multiple buses.
+ */
+ host_ptr->max_id = 17;
+ host_ptr->max_lun = 8;
+ host_ptr->max_channel = 1;
+ host_ptr->irq = dev->irq; /* Adapter IRQ number */
+ /* host_ptr->base = ( char * )(dev->resource[0].start & ~0xff); */
+ host_ptr->base = dev->resource[0].start;
+ scsi_set_pci_device(host_ptr, dev);
+ dprintk((KERN_DEBUG "Device base address = 0x%lx [0x%lx].\n",
+ host_ptr->base, dev->resource[0].start));
+ dprintk((KERN_DEBUG "Device irq = 0x%x.\n", dev->irq));
+ /*
+ * The unique_id field is a unique identifier that must
+ * be assigned so that we have some way of identifying
+ * each host adapter properly and uniquely. For hosts
+ * that do not support more than one card in the
+ * system, this does not need to be set. It is
+ * initialized to zero in scsi_register(). This is the
+ * value returned as aac->id.
+ */
+ host_ptr->unique_id = aac_count - 1;
+ /*
+ * This function is called after the device list has
+ * been built to find the tagged queueing depth
+ * supported for each device.
+ */
+ host_ptr->select_queue_depths = aac_queuedepth;
+ aac = (struct aac_dev *)host_ptr->hostdata;
+ /* attach a pointer back to Scsi_Host */
+ aac->scsi_host_ptr = host_ptr;
+ aac->pdev = dev;
+ aac->cardtype = index;
+ aac->name = aac->scsi_host_ptr->hostt->name;
+ aac->id = aac->scsi_host_ptr->unique_id;
+ /* Initialize the ordinal number of the device to -1 */
+ fsa_dev_ptr = &(aac->fsa_dev);
+ for( container=0; container < MAXIMUM_NUM_CONTAINERS; container++)
+ fsa_dev_ptr->devno[container] = -1;
+
+ dprintk((KERN_DEBUG "Initializing Hardware...\n"));
+
+ if((*aac_drivers[index].init)(aac , host_ptr->unique_id) != 0)
+ {
+ /* device initialization failed */
+ printk(KERN_WARNING
+ "aacraid: device initialization failed.\n");
+ scsi_unregister(host_ptr);
+ aac_count--;
+ continue;
+ }
+ dprintk((KERN_DEBUG "%s:%d device initialization successful.\n",
+ name, host_ptr->unique_id));
+ aac_get_adapter_info(aac);
+
+ dprintk((KERN_DEBUG "%s got adapter info.\n", name));
+
+ if(nondasd != -1)
+ {
+ /* someone told us how to set this on the cmdline */
+ aac->nondasd_support = (nondasd!=0);
+ }
+ if(aac->nondasd_support != 0){
+ printk(KERN_INFO "%s%d: Non-DASD support enabled\n",
+ aac->name, aac->id);
+ }
+ dprintk((KERN_DEBUG "%s:%d options flag %04x.\n", name,
+ host_ptr->unique_id, aac->adapter_info.options));
+ if(aac->nondasd_support == 1)
+ {
+ /*
+ * max channel will be the physical
+ * channels plus 1 virtual channel all
+ * containers are on the virtual
+ * channel 0 physical channels are
+ * address by their actual physical
+ * number+1 */
+ host_ptr->max_channel = aac_drivers[index].channels+1;
+ } else {
+ host_ptr->max_channel = 1;
+ }
+ dprintk((KERN_DEBUG "Device has %d logical channels\n",
+ host_ptr->max_channel));
+ aac_get_containers(aac);
+ aac_devices[aac_count-1] = aac;
+
+ /*
+ * dmb - we may need to move these 3 parms somewhere else once
+ * we get a fib that can report the actual numbers
+ */
+ host_ptr->max_id = AAC_MAX_TARGET;
+ host_ptr->max_lun = AAC_MAX_LUN;
+
+ /*
+ * If we are PAE capable then our future DMA mappings
+ * (for read/write commands) are 64bit clean and don't
+ * need bouncing. This assumes we do no other 32bit only
+ * allocations (eg fib table expands) after this point.
+ */
+
+ if(aac->pae_support)
+ pci_set_dma_mask(dev, 0xFFFFFFFFFFFFFFFFUL);
+ }
+ }
+
+ /* XXX SMH: restore lock and IPL for SCSI layer */
+ spin_lock_irq(&io_request_lock);
+
+
+#if 0
+ if( aac_count ){
+ if((aac_cfg_major = register_chrdev( 0, "aac", &aac_cfg_fops))<0)
+ printk(KERN_WARNING "aacraid: unable to register 'aac' device.\n");
+ }
+#endif
+
+ template->present = aac_count; /* # of cards of this type found */
+ printk(KERN_DEBUG "aac_detect: returning %d\n", aac_count);
+ return aac_count;
+}
+
+/**
+ * aac_release - release SCSI host resources
+ * @host_ptr: SCSI host to clean up
+ *
+ * Release all resources previously acquired to support a specific Host
+ * Adapter and unregister the AAC Host Adapter.
+ *
+ * BUGS: Does not wait for the thread it kills to die.
+ */
+
+static int aac_release(struct Scsi_Host *host_ptr)
+{
+ struct aac_dev *dev;
+ dprintk((KERN_DEBUG "aac_release.\n"));
+ dev = (struct aac_dev *)host_ptr->hostdata;
+
+#if 0
+ /*
+ * kill any threads we started
+ */
+ kill_proc(dev->thread_pid, SIGKILL, 0);
+ wait_for_completion(&dev->aif_completion);
+#endif
+ /*
+ * Call the comm layer to detach from this adapter
+ */
+ aac_detach(dev);
+ /* Check free orderings... */
+ /* remove interrupt binding */
+ free_irq(host_ptr->irq, dev);
+ iounmap((void * )dev->regs.sa);
+ /* unregister adapter */
+ scsi_unregister(host_ptr);
+ /*
+ * FIXME: This assumes no hot plugging is going on...
+ */
+ if( aac_cfg_major >= 0 )
+ {
+#if 0
+ unregister_chrdev(aac_cfg_major, "aac");
+#endif
+ aac_cfg_major = -1;
+ }
+ return 0;
+}
+
+/**
+ * aac_queuecommand - queue a SCSI command
+ * @scsi_cmnd_ptr: SCSI command to queue
+ * @CompletionRoutine: Function to call on command completion
+ *
+ * Queues a command for execution by the associated Host Adapter.
+ */
+
+static int aac_queuecommand(Scsi_Cmnd *scsi_cmnd_ptr, void (*complete)(Scsi_Cmnd *))
+{
+ int ret;
+
+ scsi_cmnd_ptr->scsi_done = complete;
+ /*
+ * aac_scsi_cmd() handles command processing, setting the
+ * result code and calling completion routine.
+ */
+ if((ret = aac_scsi_cmd(scsi_cmnd_ptr)) != 0)
+ dprintk((KERN_DEBUG "aac_scsi_cmd failed.\n"));
+ return ret;
+}
+
+/**
+ * aac_driverinfo - Returns the host adapter name
+ * @host_ptr: Scsi host to report on
+ *
+ * Returns a static string describing the device in question
+ */
+
+const char *aac_driverinfo(struct Scsi_Host *host_ptr)
+{
+ struct aac_dev *dev = (struct aac_dev *)host_ptr->hostdata;
+ return aac_drivers[dev->cardtype].name;
+}
+
+/**
+ * aac_get_driver_ident
+ * @devtype: index into lookup table
+ *
+ * Returns a pointer to the entry in the driver lookup table.
+ */
+struct aac_driver_ident* aac_get_driver_ident(int devtype)
+{
+ return &aac_drivers[devtype];
+}
+
+/**
+ * aac_biosparm - return BIOS parameters for disk
+ * @disk: SCSI disk object to process
+ * @device: kdev_t of the disk in question
+ * @geom: geometry block to fill in
+ *
+ * Return the Heads/Sectors/Cylinders BIOS Disk Parameters for Disk.
+ * The default disk geometry is 64 heads, 32 sectors, and the appropriate
+ * number of cylinders so as not to exceed drive capacity. In order for
+ * disks equal to or larger than 1 GB to be addressable by the BIOS
+ * without exceeding the BIOS limitation of 1024 cylinders, Extended
+ * Translation should be enabled. With Extended Translation enabled,
+ * drives between 1 GB inclusive and 2 GB exclusive are given a disk
+ * geometry of 128 heads and 32 sectors, and drives above 2 GB inclusive
+ * are given a disk geometry of 255 heads and 63 sectors. However, if
+ * the BIOS detects that the Extended Translation setting does not match
+ * the geometry in the partition table, then the translation inferred
+ * from the partition table will be used by the BIOS, and a warning may
+ * be displayed.
+ */
+
+static int aac_biosparm(Scsi_Disk *disk, kdev_t dev, int *geom)
+{
+ struct diskparm *param = (struct diskparm *)geom;
+ struct buffer_head * buf;
+
+ dprintk((KERN_DEBUG "aac_biosparm.\n"));
+
+ /*
+ * Assuming extended translation is enabled - #REVISIT#
+ */
+ if( disk->capacity >= 2 * 1024 * 1024 ) /* 1 GB in 512 byte sectors */
+ {
+ if( disk->capacity >= 4 * 1024 * 1024 ) /* 2 GB in 512 byte sectors */
+ {
+ param->heads = 255;
+ param->sectors = 63;
+ }
+ else
+ {
+ param->heads = 128;
+ param->sectors = 32;
+ }
+ }
+ else
+ {
+ param->heads = 64;
+ param->sectors = 32;
+ }
+
+ param->cylinders = disk->capacity/(param->heads * param->sectors);
+
+#if 0
+ /*
+ * Read the first 1024 bytes from the disk device
+ */
+
+ buf = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, block_size(dev));
+ if(buf == NULL)
+ return 0;
+ /*
+ * If the boot sector partition table is valid, search for a partition
+ * table entry whose end_head matches one of the standard geometry
+ * translations ( 64/32, 128/32, 255/63 ).
+ */
+#endif
+
+
+ if(*(unsigned short *)(buf->b_data + 0x1fe) == cpu_to_le16(0xaa55))
+ {
+ struct partition *first = (struct partition * )(buf->b_data + 0x1be);
+ struct partition *entry = first;
+ int saved_cylinders = param->cylinders;
+ int num;
+ unsigned char end_head, end_sec;
+
+ for(num = 0; num < 4; num++)
+ {
+ end_head = entry->end_head;
+ end_sec = entry->end_sector & 0x3f;
+
+ if(end_head == 63)
+ {
+ param->heads = 64;
+ param->sectors = 32;
+ break;
+ }
+ else if(end_head == 127)
+ {
+ param->heads = 128;
+ param->sectors = 32;
+ break;
+ }
+ else if(end_head == 254)
+ {
+ param->heads = 255;
+ param->sectors = 63;
+ break;
+ }
+ entry++;
+ }
+
+ if(num == 4)
+ {
+ end_head = first->end_head;
+ end_sec = first->end_sector & 0x3f;
+ }
+
+ param->cylinders = disk->capacity / (param->heads * param->sectors);
+
+ if(num < 4 && end_sec == param->sectors)
+ {
+ if(param->cylinders != saved_cylinders)
+ dprintk((KERN_DEBUG "Adopting geometry: heads=%d, "
+ "sectors=%d from partition table %d.\n",
+ param->heads, param->sectors, num));
+ }
+ else if(end_head > 0 || end_sec > 0)
+ {
+ dprintk((KERN_DEBUG "Strange geometry: heads=%d, "
+ "sectors=%d in partition table %d.\n",
+ end_head + 1, end_sec, num));
+ dprintk((KERN_DEBUG "Using geometry: heads=%d, sectors=%d.\n",
+ param->heads, param->sectors));
+ }
+ }
+#if 0
+ brelse(buf);
+#endif
+ return 0;
+}
+
+/**
+ * aac_queuedepth - compute queue depths
+ * @host: SCSI host in question
+ * @dev: SCSI device we are considering
+ *
+ * Selects queue depths for each target device based on the host adapter's
+ * total capacity and the queue depth supported by the target device.
+ * A queue depth of one automatically disables tagged queueing.
+ */
+
+static void aac_queuedepth(struct Scsi_Host * host, Scsi_Device * dev )
+{
+ Scsi_Device * dptr;
+
+ dprintk((KERN_DEBUG "aac_queuedepth.\n"));
+ dprintk((KERN_DEBUG "Device # Q Depth Online\n"));
+ dprintk((KERN_DEBUG "---------------------------\n"));
+ for(dptr = dev; dptr != NULL; dptr = dptr->next)
+ {
+ if(dptr->host == host)
+ {
+ dptr->queue_depth = 10;
+ dprintk((KERN_DEBUG " %2d %d %d\n",
+ dptr->id, dptr->queue_depth, dptr->online));
+ }
+ }
+}
+
+
+/**
+ * aac_eh_abort - Abort command if possible.
+ * @cmd: SCSI command block to abort
+ *
+ * Called when the midlayer wishes to abort a command. We don't support
+ * this facility, and our firmware looks after life for us. We just
+ * report this as failing
+ */
+
+static int aac_eh_abort(Scsi_Cmnd *cmd)
+{
+ return FAILED;
+}
+
+/**
+ * aac_eh_device_reset - Reset command handling
+ * @cmd: SCSI command block causing the reset
+ *
+ * Issue a reset of a SCSI device. We are ourselves not truely a SCSI
+ * controller and our firmware will do the work for us anyway. Thus this
+ * is a no-op. We just return FAILED.
+ */
+
+static int aac_eh_device_reset(Scsi_Cmnd *cmd)
+{
+ return FAILED;
+}
+
+/**
+ * aac_eh_bus_reset - Reset command handling
+ * @scsi_cmd: SCSI command block causing the reset
+ *
+ * Issue a reset of a SCSI bus. We are ourselves not truely a SCSI
+ * controller and our firmware will do the work for us anyway. Thus this
+ * is a no-op. We just return FAILED.
+ */
+
+static int aac_eh_bus_reset(Scsi_Cmnd* cmd)
+{
+ return FAILED;
+}
+
+/**
+ * aac_eh_hba_reset - Reset command handling
+ * @scsi_cmd: SCSI command block causing the reset
+ *
+ * Issue a reset of a SCSI host. If things get this bad then arguably we should
+ * go take a look at what the host adapter is doing and see if something really
+ * broke (as can occur at least on my Dell QC card if a drive keeps failing spinup)
+ */
+
+static int aac_eh_reset(Scsi_Cmnd* cmd)
+{
+ printk(KERN_ERR "aacraid: Host adapter reset request. SCSI hang ?\n");
+ return FAILED;
+}
+
+/**
+ * aac_ioctl - Handle SCSI ioctls
+ * @scsi_dev_ptr: scsi device to operate upon
+ * @cmd: ioctl command to use issue
+ * @arg: ioctl data pointer
+ *
+ * Issue an ioctl on an aacraid device. Returns a standard unix error code or
+ * zero for success
+ */
+
+static int aac_ioctl(Scsi_Device * scsi_dev_ptr, int cmd, void * arg)
+{
+ struct aac_dev *dev;
+ dprintk((KERN_DEBUG "aac_ioctl.\n"));
+ dev = (struct aac_dev *)scsi_dev_ptr->host->hostdata;
+ return aac_do_ioctl(dev, cmd, arg);
+}
+
+/**
+ * aac_cfg_open - open a configuration file
+ * @inode: inode being opened
+ * @file: file handle attached
+ *
+ * Called when the configuration device is opened. Does the needed
+ * set up on the handle and then returns
+ *
+ * Bugs: This needs extending to check a given adapter is present
+ * so we can support hot plugging, and to ref count adapters.
+ */
+
+static int aac_cfg_open(struct inode * inode, struct file * file )
+{
+ unsigned minor_number = MINOR(inode->i_rdev);
+ if(minor_number >= aac_count)
+ return -ENODEV;
+ return 0;
+}
+
+/**
+ * aac_cfg_release - close down an AAC config device
+ * @inode: inode of configuration file
+ * @file: file handle of configuration file
+ *
+ * Called when the last close of the configuration file handle
+ * is performed.
+ */
+
+static int aac_cfg_release(struct inode * inode, struct file * file )
+{
+ return 0;
+}
+
+/**
+ * aac_cfg_ioctl - AAC configuration request
+ * @inode: inode of device
+ * @file: file handle
+ * @cmd: ioctl command code
+ * @arg: argument
+ *
+ * Handles a configuration ioctl. Currently this involves wrapping it
+ * up and feeding it into the nasty windowsalike glue layer.
+ *
+ * Bugs: Needs locking against parallel ioctls lower down
+ * Bugs: Needs to handle hot plugging
+ */
+
+static int aac_cfg_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg )
+{
+ struct aac_dev *dev = aac_devices[MINOR(inode->i_rdev)];
+ return aac_do_ioctl(dev, cmd, (void *)arg);
+}
+
+/*
+ * To use the low level SCSI driver support using the linux kernel loadable
+ * module interface we should initialize the global variable driver_interface
+ * (datatype Scsi_Host_Template) and then include the file scsi_module.c.
+ */
+
+static Scsi_Host_Template driver_template = {
+/* module: THIS_MODULE, */
+ name: "AAC",
+/* proc_info: aac_procinfo, */
+ detect: aac_detect,
+ release: aac_release,
+ info: aac_driverinfo,
+ ioctl: aac_ioctl,
+ queuecommand: aac_queuecommand,
+ bios_param: aac_biosparm,
+ can_queue: AAC_NUM_IO_FIB,
+ this_id: 16,
+ sg_tablesize: 16,
+ max_sectors: 128,
+ cmd_per_lun: AAC_NUM_IO_FIB,
+ eh_abort_handler: aac_eh_abort,
+ eh_device_reset_handler:aac_eh_device_reset,
+ eh_bus_reset_handler: aac_eh_bus_reset,
+ eh_host_reset_handler: aac_eh_reset,
+ use_new_eh_code: 1,
+
+ use_clustering: ENABLE_CLUSTERING,
+};
+
+#include "../scsi_module.c.inc"
+
+#ifdef CONFIG_PROC_FS
+/**
+ * aac_procinfo - Implement /proc/scsi/<drivername>/<n>
+ * @proc_buffer: memory buffer for I/O
+ * @start_ptr: pointer to first valid data
+ * @offset: offset into file
+ * @bytes_available: space left
+ * @host_no: scsi host ident
+ * @write: direction of I/O
+ *
+ * Used to export driver statistics and other infos to the world outside
+ * the kernel using the proc file system. Also provides an interface to
+ * feed the driver with information.
+ *
+ * For reads
+ * - if offset > 0 return 0
+ * - if offset == 0 write data to proc_buffer and set the start_ptr to
+ * beginning of proc_buffer, return the number of characters written.
+ * For writes
+ * - writes currently not supported, return 0
+ *
+ * Bugs: Only offset zero is handled
+ */
+
+static int aac_procinfo(char *proc_buffer, char **start_ptr,off_t offset,
+ int bytes_available, int host_no, int write)
+{
+ if(write || offset > 0)
+ return 0;
+ *start_ptr = proc_buffer;
+ return sprintf(proc_buffer, "%s %d\n",
+ "Raid Controller, scsi hba number", host_no);
+}
+#endif
+
+EXPORT_NO_SYMBOLS;
diff --git a/xen/drivers/scsi/aacraid/rx.c b/xen/drivers/scsi/aacraid/rx.c
new file mode 100644
index 0000000000..e79ad49e74
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/rx.c
@@ -0,0 +1,457 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ * rx.c
+ *
+ * Abstract: Hardware miniport for Drawbridge specific hardware functions.
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/* #include <xeno/spinlock.h> */
+/* #include <xeno/slab.h> */
+#include <xeno/blk.h>
+#include <xeno/delay.h>
+/* #include <xeno/completion.h> */
+/* #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+static void aac_rx_intr(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct aac_dev *dev = dev_id;
+ unsigned long bellbits;
+ u8 intstat, mask;
+ intstat = rx_readb(dev, MUnit.OISR);
+ /*
+ * Read mask and invert because drawbridge is reversed.
+ * This allows us to only service interrupts that have
+ * been enabled.
+ */
+ mask = ~(rx_readb(dev, MUnit.OIMR));
+ /* Check to see if this is our interrupt. If it isn't just return */
+
+ if (intstat & mask)
+ {
+ bellbits = rx_readl(dev, OutboundDoorbellReg);
+ if (bellbits & DoorBellPrintfReady) {
+ aac_printf(dev, le32_to_cpu(rx_readl (dev, IndexRegs.Mailbox[5])));
+ rx_writel(dev, MUnit.ODR,DoorBellPrintfReady);
+ rx_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
+ }
+ else if (bellbits & DoorBellAdapterNormCmdReady) {
+ aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
+ }
+ else if (bellbits & DoorBellAdapterNormRespReady) {
+ aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+ rx_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
+ }
+ else if (bellbits & DoorBellAdapterNormCmdNotFull) {
+ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
+ }
+ else if (bellbits & DoorBellAdapterNormRespNotFull) {
+ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
+ rx_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
+ }
+ }
+}
+
+/**
+ * aac_rx_enable_interrupt - Enable event reporting
+ * @dev: Adapter
+ * @event: Event to enable
+ *
+ * Enable event reporting from the i960 for a given event.
+ */
+
+static void aac_rx_enable_interrupt(struct aac_dev * dev, u32 event)
+{
+ switch (event) {
+
+ case HostNormCmdQue:
+ dev->irq_mask &= ~(OUTBOUNDDOORBELL_1);
+ break;
+
+ case HostNormRespQue:
+ dev->irq_mask &= ~(OUTBOUNDDOORBELL_2);
+ break;
+
+ case AdapNormCmdNotFull:
+ dev->irq_mask &= ~(OUTBOUNDDOORBELL_3);
+ break;
+
+ case AdapNormRespNotFull:
+ dev->irq_mask &= ~(OUTBOUNDDOORBELL_4);
+ break;
+ }
+}
+
+/**
+ * aac_rx_disable_interrupt - Disable event reporting
+ * @dev: Adapter
+ * @event: Event to enable
+ *
+ * Disable event reporting from the i960 for a given event.
+ */
+
+static void aac_rx_disable_interrupt(struct aac_dev *dev, u32 event)
+{
+ switch (event) {
+
+ case HostNormCmdQue:
+ dev->irq_mask |= (OUTBOUNDDOORBELL_1);
+ break;
+
+ case HostNormRespQue:
+ dev->irq_mask |= (OUTBOUNDDOORBELL_2);
+ break;
+
+ case AdapNormCmdNotFull:
+ dev->irq_mask |= (OUTBOUNDDOORBELL_3);
+ break;
+
+ case AdapNormRespNotFull:
+ dev->irq_mask |= (OUTBOUNDDOORBELL_4);
+ break;
+ }
+}
+
+/**
+ * rx_sync_cmd - send a command and wait
+ * @dev: Adapter
+ * @command: Command to execute
+ * @p1: first parameter
+ * @ret: adapter status
+ *
+ * This routine will send a synchronous comamnd to the adapter and wait
+ * for its completion.
+ */
+
+static int rx_sync_cmd(struct aac_dev *dev, u32 command, u32 p1, u32 *status)
+{
+ unsigned long start;
+ int ok;
+ /*
+ * Write the command into Mailbox 0
+ */
+ rx_writel(dev, InboundMailbox0, cpu_to_le32(command));
+ /*
+ * Write the parameters into Mailboxes 1 - 4
+ */
+ rx_writel(dev, InboundMailbox1, cpu_to_le32(p1));
+ rx_writel(dev, InboundMailbox2, 0);
+ rx_writel(dev, InboundMailbox3, 0);
+ rx_writel(dev, InboundMailbox4, 0);
+ /*
+ * Clear the synch command doorbell to start on a clean slate.
+ */
+ rx_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
+ /*
+ * Disable doorbell interrupts
+ */
+ rx_writeb(dev, MUnit.OIMR, rx_readb(dev, MUnit.OIMR) | 0x04);
+ /*
+ * Force the completion of the mask register write before issuing
+ * the interrupt.
+ */
+ rx_readb (dev, MUnit.OIMR);
+ /*
+ * Signal that there is a new synch command
+ */
+ rx_writel(dev, InboundDoorbellReg, INBOUNDDOORBELL_0);
+
+ ok = 0;
+ start = jiffies;
+
+ /*
+ * Wait up to 30 seconds
+ */
+ while (time_before(jiffies, start+30*HZ))
+ {
+ /* Delay 5 microseconds to let Mon960 get info. */
+ udelay(5);
+ /*
+ * Mon960 will set doorbell0 bit when its completed the command.
+ */
+ if (rx_readl(dev, OutboundDoorbellReg) & OUTBOUNDDOORBELL_0) {
+ /*
+ * Clear the doorbell.
+ */
+ rx_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
+ ok = 1;
+ break;
+ }
+#if 0
+ /*
+ * Yield the processor in case we are slow
+ */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(1);
+#else
+ /* XXX SMH: not in xen we don't */
+ mdelay(50);
+#endif
+
+ }
+ if (ok != 1) {
+ /*
+ * Restore interrupt mask even though we timed out
+ */
+ rx_writeb(dev, MUnit.OIMR, rx_readl(dev, MUnit.OIMR) & 0xfb);
+ return -ETIMEDOUT;
+ }
+ /*
+ * Pull the synch status from Mailbox 0.
+ */
+ *status = le32_to_cpu(rx_readl(dev, IndexRegs.Mailbox[0]));
+ /*
+ * Clear the synch command doorbell.
+ */
+ rx_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
+ /*
+ * Restore interrupt mask
+ */
+ rx_writeb(dev, MUnit.OIMR, rx_readl(dev, MUnit.OIMR) & 0xfb);
+ return 0;
+
+}
+
+/**
+ * aac_rx_interrupt_adapter - interrupt adapter
+ * @dev: Adapter
+ *
+ * Send an interrupt to the i960 and breakpoint it.
+ */
+
+static void aac_rx_interrupt_adapter(struct aac_dev *dev)
+{
+ u32 ret;
+ rx_sync_cmd(dev, BREAKPOINT_REQUEST, 0, &ret);
+}
+
+/**
+ * aac_rx_notify_adapter - send an event to the adapter
+ * @dev: Adapter
+ * @event: Event to send
+ *
+ * Notify the i960 that something it probably cares about has
+ * happened.
+ */
+
+static void aac_rx_notify_adapter(struct aac_dev *dev, u32 event)
+{
+ switch (event) {
+
+ case AdapNormCmdQue:
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_1);
+ break;
+ case HostNormRespNotFull:
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_4);
+ break;
+ case AdapNormRespQue:
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_2);
+ break;
+ case HostNormCmdNotFull:
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
+ break;
+ case HostShutdown:
+// rx_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, &ret);
+ break;
+ case FastIo:
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
+ break;
+ case AdapPrintfDone:
+ rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_5);
+ break;
+ default:
+ BUG();
+ break;
+ }
+}
+
+/**
+ * aac_rx_start_adapter - activate adapter
+ * @dev: Adapter
+ *
+ * Start up processing on an i960 based AAC adapter
+ */
+
+static void aac_rx_start_adapter(struct aac_dev *dev)
+{
+ u32 status;
+ struct aac_init *init;
+
+ init = dev->init;
+ printk("aac_rx_start: dev is %p, init is %p\n", dev, init);
+ init->HostElapsedSeconds = cpu_to_le32(jiffies/HZ);
+ /*
+ * Tell the adapter we are back and up and running so it will scan
+ * its command queues and enable our interrupts
+ */
+ dev->irq_mask = (DoorBellPrintfReady | OUTBOUNDDOORBELL_1 |
+ OUTBOUNDDOORBELL_2 | OUTBOUNDDOORBELL_3 |
+ OUTBOUNDDOORBELL_4);
+ /*
+ * First clear out all interrupts. Then enable the one's that we
+ * can handle.
+ */
+ rx_writeb(dev, MUnit.OIMR, 0xff);
+ rx_writel(dev, MUnit.ODR, 0xffffffff);
+// rx_writeb(dev, MUnit.OIMR, ~(u8)OUTBOUND_DOORBELL_INTERRUPT_MASK);
+ rx_writeb(dev, MUnit.OIMR, 0xfb);
+
+ // We can only use a 32 bit address here
+ rx_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS,
+ (u32)(ulong)dev->init_pa, &status);
+}
+
+/**
+ * aac_rx_init - initialize an i960 based AAC card
+ * @dev: device to configure
+ * @devnum: adapter number
+ *
+ * Allocate and set up resources for the i960 based AAC variants. The
+ * device_interface in the commregion will be allocated and linked
+ * to the comm region.
+ */
+
+int aac_rx_init(struct aac_dev *dev, unsigned long num)
+{
+ unsigned long start;
+ unsigned long status;
+ int instance;
+ const char * name;
+
+ dev->devnum = num;
+ instance = dev->id;
+ name = dev->name;
+
+ dprintk((KERN_ERR "aac_rx_init called, num %ld, scsi host ptr = %p\n",
+ num, (void *)(dev->scsi_host_ptr)));
+
+ dprintk((KERN_ERR "scsi_host_ptr->base is %p\n",
+ (void *)dev->scsi_host_ptr->base));
+ /*
+ * Map in the registers from the adapter.
+ */
+ if((dev->regs.rx = (struct rx_registers *)
+ ioremap((unsigned long)dev->scsi_host_ptr->base, 8192))==NULL)
+ {
+ printk(KERN_WARNING "aacraid: unable to map i960.\n" );
+ return -1;
+ }
+
+// dprintk((KERN_ERR "aac_rx_init: AAA\n"));
+ /*
+ * Check to see if the board failed any self tests.
+ */
+ if (rx_readl(dev, IndexRegs.Mailbox[7]) & SELF_TEST_FAILED) {
+ printk(KERN_ERR "%s%d: adapter self-test failed.\n",
+ dev->name, instance);
+ return -1;
+ }
+
+
+// dprintk((KERN_ERR "aac_rx_init: BBB\n"));
+ /*
+ * Check to see if the board panic'd while booting.
+ */
+ if (rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_PANIC) {
+ printk(KERN_ERR "%s%d: adapter kernel panic'd.\n",
+ dev->name, instance);
+ return -1;
+ }
+ start = jiffies;
+
+// dprintk((KERN_ERR "aac_rx_init: DDD\n"));
+ /*
+ * Wait for the adapter to be up and running. Wait up to 3 minutes
+ */
+ while (!(rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_UP_AND_RUNNING))
+ {
+ if(time_after(jiffies, start+180*HZ))
+ {
+ status = rx_readl(dev, IndexRegs.Mailbox[7]) >> 16;
+ printk(KERN_ERR "%s%d: adapter kernel failed to start,"
+ "init status = %ld.\n", dev->name,
+ instance, status);
+ return -1;
+ }
+// dprintk((KERN_ERR "aac_rx_init: XXX\n"));
+
+#if 0
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(1);
+#else
+ /* XXX SMH: no sleeping for us (we're the xen idle task) */
+ mdelay(50);
+#endif
+
+ }
+
+// dprintk((KERN_ERR "aac_rx_init: ZZZ!\n"));
+ if (request_irq(dev->scsi_host_ptr->irq, aac_rx_intr,
+ SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev) < 0)
+ {
+ printk(KERN_ERR "%s%d: Interrupt unavailable.\n",
+ name, instance);
+ return -1;
+ }
+ /*
+ * Fill in the function dispatch table.
+ */
+ dev->a_ops.adapter_interrupt = aac_rx_interrupt_adapter;
+ dev->a_ops.adapter_enable_int = aac_rx_enable_interrupt;
+ dev->a_ops.adapter_disable_int = aac_rx_disable_interrupt;
+ dev->a_ops.adapter_notify = aac_rx_notify_adapter;
+ dev->a_ops.adapter_sync_cmd = rx_sync_cmd;
+
+ if (aac_init_adapter(dev) == NULL)
+ return -1;
+#ifdef TRY_TASKLET
+ aac_command_tasklet.data = (unsigned long)dev;
+ tasklet_enable(&aac_command_tasklet);
+#else
+ /*
+ * Start any kernel threads needed
+ */
+ dev->thread_pid = kernel_thread((int (*)(void *))aac_command_thread,
+ dev, 0);
+#endif
+
+ /*
+ * Tell the adapter that all is configured, and it can start
+ * accepting requests
+ */
+ aac_rx_start_adapter(dev);
+ return 0;
+}
diff --git a/xen/drivers/scsi/aacraid/sa.c b/xen/drivers/scsi/aacraid/sa.c
new file mode 100644
index 0000000000..edb5679d41
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/sa.c
@@ -0,0 +1,406 @@
+/*
+ * Adaptec AAC series RAID controller driver
+ * (c) Copyright 2001 Red Hat Inc. <alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ * sa.c
+ *
+ * Abstract: Drawbridge specific support functions
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+/* #include <xeno/pci.h> */
+/* #include <xeno/spinlock.h> */
+/* #include <xeno/slab.h> */
+#include <xeno/blk.h>
+#include <xeno/delay.h>
+/* #include <xeno/completion.h> */
+/* #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+static void aac_sa_intr(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct aac_dev *dev = dev_id;
+ unsigned short intstat, mask;
+
+ intstat = sa_readw(dev, DoorbellReg_p);
+ /*
+ * Read mask and invert because drawbridge is reversed.
+ * This allows us to only service interrupts that have been enabled.
+ */
+ mask = ~(sa_readw(dev, SaDbCSR.PRISETIRQMASK));
+
+ /* Check to see if this is our interrupt. If it isn't just return */
+
+ if (intstat & mask) {
+ if (intstat & PrintfReady) {
+ aac_printf(dev, le32_to_cpu(sa_readl(dev, Mailbox5)));
+ sa_writew(dev, DoorbellClrReg_p, PrintfReady); /* clear PrintfReady */
+ sa_writew(dev, DoorbellReg_s, PrintfDone);
+ } else if (intstat & DOORBELL_1) { // dev -> Host Normal Command Ready
+ aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_1);
+ } else if (intstat & DOORBELL_2) { // dev -> Host Normal Response Ready
+ aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_2);
+ } else if (intstat & DOORBELL_3) { // dev -> Host Normal Command Not Full
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_3);
+ } else if (intstat & DOORBELL_4) { // dev -> Host Normal Response Not Full
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_4);
+ }
+ }
+}
+
+/**
+ * aac_sa_enable_interrupt - enable an interrupt event
+ * @dev: Which adapter to enable.
+ * @event: Which adapter event.
+ *
+ * This routine will enable the corresponding adapter event to cause an interrupt on
+ * the host.
+ */
+
+void aac_sa_enable_interrupt(struct aac_dev *dev, u32 event)
+{
+ switch (event) {
+
+ case HostNormCmdQue:
+ sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_1);
+ break;
+
+ case HostNormRespQue:
+ sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_2);
+ break;
+
+ case AdapNormCmdNotFull:
+ sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_3);
+ break;
+
+ case AdapNormRespNotFull:
+ sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_4);
+ break;
+ }
+}
+
+/**
+ * aac_sa_disable_interrupt - disable an interrupt event
+ * @dev: Which adapter to enable.
+ * @event: Which adapter event.
+ *
+ * This routine will enable the corresponding adapter event to cause an interrupt on
+ * the host.
+ */
+
+void aac_sa_disable_interrupt (struct aac_dev *dev, u32 event)
+{
+ switch (event) {
+
+ case HostNormCmdQue:
+ sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_1);
+ break;
+
+ case HostNormRespQue:
+ sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_2);
+ break;
+
+ case AdapNormCmdNotFull:
+ sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_3);
+ break;
+
+ case AdapNormRespNotFull:
+ sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_4);
+ break;
+ }
+}
+
+/**
+ * aac_sa_notify_adapter - handle adapter notification
+ * @dev: Adapter that notification is for
+ * @event: Event to notidy
+ *
+ * Notify the adapter of an event
+ */
+
+void aac_sa_notify_adapter(struct aac_dev *dev, u32 event)
+{
+ switch (event) {
+
+ case AdapNormCmdQue:
+ sa_writew(dev, DoorbellReg_s,DOORBELL_1);
+ break;
+ case HostNormRespNotFull:
+ sa_writew(dev, DoorbellReg_s,DOORBELL_4);
+ break;
+ case AdapNormRespQue:
+ sa_writew(dev, DoorbellReg_s,DOORBELL_2);
+ break;
+ case HostNormCmdNotFull:
+ sa_writew(dev, DoorbellReg_s,DOORBELL_3);
+ break;
+ case HostShutdown:
+ //sa_sync_cmd(dev, HOST_CRASHING, 0, &ret);
+ break;
+ case FastIo:
+ sa_writew(dev, DoorbellReg_s,DOORBELL_6);
+ break;
+ case AdapPrintfDone:
+ sa_writew(dev, DoorbellReg_s,DOORBELL_5);
+ break;
+ default:
+ BUG();
+ break;
+ }
+}
+
+
+/**
+ * sa_sync_cmd - send a command and wait
+ * @dev: Adapter
+ * @command: Command to execute
+ * @p1: first parameter
+ * @ret: adapter status
+ *
+ * This routine will send a synchronous comamnd to the adapter and wait
+ * for its completion.
+ */
+
+static int sa_sync_cmd(struct aac_dev *dev, u32 command, u32 p1, u32 *ret)
+{
+ unsigned long start;
+ int ok;
+ /*
+ * Write the Command into Mailbox 0
+ */
+ sa_writel(dev, Mailbox0, cpu_to_le32(command));
+ /*
+ * Write the parameters into Mailboxes 1 - 4
+ */
+ sa_writel(dev, Mailbox1, cpu_to_le32(p1));
+ sa_writel(dev, Mailbox2, 0);
+ sa_writel(dev, Mailbox3, 0);
+ sa_writel(dev, Mailbox4, 0);
+ /*
+ * Clear the synch command doorbell to start on a clean slate.
+ */
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_0);
+ /*
+ * Signal that there is a new synch command
+ */
+ sa_writew(dev, DoorbellReg_s, DOORBELL_0);
+
+ ok = 0;
+ start = jiffies;
+
+ while(time_before(jiffies, start+30*HZ))
+ {
+ /*
+ * Delay 5uS so that the monitor gets access
+ */
+ udelay(5);
+ /*
+ * Mon110 will set doorbell0 bit when it has
+ * completed the command.
+ */
+ if(sa_readw(dev, DoorbellReg_p) & DOORBELL_0) {
+ ok = 1;
+ break;
+ }
+#if 0
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(1);
+#endif
+ mdelay(100);
+
+ }
+
+ if (ok != 1)
+ return -ETIMEDOUT;
+ /*
+ * Clear the synch command doorbell.
+ */
+ sa_writew(dev, DoorbellClrReg_p, DOORBELL_0);
+ /*
+ * Pull the synch status from Mailbox 0.
+ */
+ *ret = le32_to_cpu(sa_readl(dev, Mailbox0));
+ return 0;
+}
+
+/**
+ * aac_sa_interrupt_adapter - interrupt an adapter
+ * @dev: Which adapter to enable.
+ *
+ * Breakpoint an adapter.
+ */
+
+static void aac_sa_interrupt_adapter (struct aac_dev *dev)
+{
+ u32 ret;
+ sa_sync_cmd(dev, BREAKPOINT_REQUEST, 0, &ret);
+}
+
+/**
+ * aac_sa_start_adapter - activate adapter
+ * @dev: Adapter
+ *
+ * Start up processing on an ARM based AAC adapter
+ */
+
+static void aac_sa_start_adapter(struct aac_dev *dev)
+{
+ u32 ret;
+ struct aac_init *init;
+ /*
+ * Fill in the remaining pieces of the init.
+ */
+ init = dev->init;
+ init->HostElapsedSeconds = cpu_to_le32(jiffies/HZ);
+
+ dprintk(("INIT\n"));
+ /*
+ * Tell the adapter we are back and up and running so it will scan its command
+ * queues and enable our interrupts
+ */
+ dev->irq_mask = (PrintfReady | DOORBELL_1 | DOORBELL_2 | DOORBELL_3 | DOORBELL_4);
+ /*
+ * First clear out all interrupts. Then enable the one's that
+ * we can handle.
+ */
+ dprintk(("MASK\n"));
+ sa_writew(dev, SaDbCSR.PRISETIRQMASK, cpu_to_le16(0xffff));
+ sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, (PrintfReady | DOORBELL_1 | DOORBELL_2 | DOORBELL_3 | DOORBELL_4));
+ dprintk(("SYNCCMD\n"));
+ /* We can only use a 32 bit address here */
+ sa_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa, &ret);
+}
+
+/**
+ * aac_sa_init - initialize an ARM based AAC card
+ * @dev: device to configure
+ * @devnum: adapter number
+ *
+ * Allocate and set up resources for the ARM based AAC variants. The
+ * device_interface in the commregion will be allocated and linked
+ * to the comm region.
+ */
+
+int aac_sa_init(struct aac_dev *dev, unsigned long devnum)
+{
+ unsigned long start;
+ unsigned long status;
+ int instance;
+ const char *name;
+
+ dev->devnum = devnum;
+
+ dprintk(("PREINST\n"));
+ instance = dev->id;
+ name = dev->name;
+
+ /*
+ * Map in the registers from the adapter.
+ */
+ dprintk(("PREMAP\n"));
+
+ if((dev->regs.sa = (struct sa_registers *)ioremap((unsigned long)dev->scsi_host_ptr->base, 8192))==NULL)
+ {
+ printk(KERN_WARNING "aacraid: unable to map ARM.\n" );
+ return -1;
+ }
+ /*
+ * Check to see if the board failed any self tests.
+ */
+ if (sa_readl(dev, Mailbox7) & SELF_TEST_FAILED) {
+ printk(KERN_WARNING "%s%d: adapter self-test failed.\n", name, instance);
+ return -1;
+ }
+ /*
+ * Check to see if the board panic'd while booting.
+ */
+ if (sa_readl(dev, Mailbox7) & KERNEL_PANIC) {
+ printk(KERN_WARNING "%s%d: adapter kernel panic'd.\n", name, instance);
+ return -1;
+ }
+ start = jiffies;
+ /*
+ * Wait for the adapter to be up and running. Wait up to 3 minutes.
+ */
+ while (!(sa_readl(dev, Mailbox7) & KERNEL_UP_AND_RUNNING)) {
+ if (time_after(start+180*HZ, jiffies)) {
+ status = sa_readl(dev, Mailbox7) >> 16;
+ printk(KERN_WARNING "%s%d: adapter kernel failed to start, init status = %d.\n", name, instance, le32_to_cpu(status));
+ return -1;
+ }
+#if 0
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(1);
+#endif
+ mdelay(100);
+ }
+
+ dprintk(("ATIRQ\n"));
+ if (request_irq(dev->scsi_host_ptr->irq, aac_sa_intr, SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev ) < 0) {
+ printk(KERN_WARNING "%s%d: Interrupt unavailable.\n", name, instance);
+ return -1;
+ }
+
+ /*
+ * Fill in the function dispatch table.
+ */
+
+ dev->a_ops.adapter_interrupt = aac_sa_interrupt_adapter;
+ dev->a_ops.adapter_enable_int = aac_sa_enable_interrupt;
+ dev->a_ops.adapter_disable_int = aac_sa_disable_interrupt;
+ dev->a_ops.adapter_notify = aac_sa_notify_adapter;
+ dev->a_ops.adapter_sync_cmd = sa_sync_cmd;
+
+ dprintk(("FUNCDONE\n"));
+
+ if(aac_init_adapter(dev) == NULL)
+ return -1;
+
+ dprintk(("NEWADAPTDONE\n"));
+#if 0
+ /*
+ * Start any kernel threads needed
+ */
+ dev->thread_pid = kernel_thread((int (*)(void *))aac_command_thread, dev, 0);
+#endif
+
+ /*
+ * Tell the adapter that all is configure, and it can start
+ * accepting requests
+ */
+ dprintk(("STARTING\n"));
+ aac_sa_start_adapter(dev);
+ dprintk(("STARTED\n"));
+ return 0;
+}
+
diff --git a/xen/drivers/scsi/constants.c b/xen/drivers/scsi/constants.c
new file mode 100644
index 0000000000..aea16f77d9
--- /dev/null
+++ b/xen/drivers/scsi/constants.c
@@ -0,0 +1,1005 @@
+/*
+ * ASCII values for a number of symbolic constants, printing functions,
+ * etc.
+ * Additions for SCSI 2 and Linux 2.2.x by D. Gilbert (990422)
+ *
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/config.h>
+#include <xeno/blk.h>
+/*#include <linux/kernel.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#define CONST_COMMAND 0x01
+#define CONST_STATUS 0x02
+#define CONST_SENSE 0x04
+#define CONST_XSENSE 0x08
+#define CONST_CMND 0x10
+#define CONST_MSG 0x20
+#define CONST_HOST 0x40
+#define CONST_DRIVER 0x80
+
+static const char unknown[] = "UNKNOWN";
+
+#ifdef CONFIG_SCSI_CONSTANTS
+#ifdef CONSTANTS
+#undef CONSTANTS
+#endif
+#define CONSTANTS (CONST_COMMAND | CONST_STATUS | CONST_SENSE | CONST_XSENSE \
+ | CONST_CMND | CONST_MSG | CONST_HOST | CONST_DRIVER)
+#endif
+
+#if (CONSTANTS & CONST_COMMAND)
+static const char * group_0_commands[] = {
+/* 00-03 */ "Test Unit Ready", "Rezero Unit", unknown, "Request Sense",
+/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reasssign Blocks",
+/* 08-0d */ "Read (6)", unknown, "Write (6)", "Seek (6)", unknown, unknown,
+/* 0e-12 */ unknown, "Read Reverse", "Write Filemarks", "Space", "Inquiry",
+/* 13-16 */ "Verify", "Recover Buffered Data", "Mode Select", "Reserve",
+/* 17-1b */ "Release", "Copy", "Erase", "Mode Sense", "Start/Stop Unit",
+/* 1c-1d */ "Receive Diagnostic", "Send Diagnostic",
+/* 1e-1f */ "Prevent/Allow Medium Removal", unknown,
+};
+
+
+static const char *group_1_commands[] = {
+/* 20-22 */ unknown, unknown, unknown,
+/* 23-28 */ unknown, "Define window parameters", "Read Capacity",
+ unknown, unknown, "Read (10)",
+/* 29-2d */ "Read Generation", "Write (10)", "Seek (10)", "Erase",
+ "Read updated block",
+/* 2e-31 */ "Write Verify","Verify", "Search High", "Search Equal",
+/* 32-34 */ "Search Low", "Set Limits", "Prefetch or Read Position",
+/* 35-37 */ "Synchronize Cache","Lock/Unlock Cache", "Read Defect Data",
+/* 38-3c */ "Medium Scan", "Compare", "Copy Verify", "Write Buffer",
+ "Read Buffer",
+/* 3d-3f */ "Update Block", "Read Long", "Write Long",
+};
+
+
+static const char *group_2_commands[] = {
+/* 40-41 */ "Change Definition", "Write Same",
+/* 42-48 */ "Read sub-channel", "Read TOC", "Read header",
+ "Play audio (10)", unknown, "Play audio msf",
+ "Play audio track/index",
+/* 49-4f */ "Play track relative (10)", unknown, "Pause/resume",
+ "Log Select", "Log Sense", unknown, unknown,
+/* 50-55 */ unknown, unknown, unknown, unknown, unknown, "Mode Select (10)",
+/* 56-5b */ unknown, unknown, unknown, unknown, "Mode Sense (10)", unknown,
+/* 5c-5f */ unknown, unknown, unknown,
+};
+
+
+/* The following are 16 byte commands in group 4 */
+static const char *group_4_commands[] = {
+/* 80-84 */ unknown, unknown, unknown, unknown, unknown,
+/* 85-89 */ "Memory Export In (16)", unknown, unknown, unknown,
+ "Memory Export Out (16)",
+/* 8a-8f */ unknown, unknown, unknown, unknown, unknown, unknown,
+/* 90-94 */ unknown, unknown, unknown, unknown, unknown,
+/* 95-99 */ unknown, unknown, unknown, unknown, unknown,
+/* 9a-9f */ unknown, unknown, unknown, unknown, unknown, unknown,
+};
+
+
+/* The following are 12 byte commands in group 5 */
+static const char *group_5_commands[] = {
+/* a0-a5 */ unknown, unknown, unknown, unknown, unknown,
+ "Move medium/play audio(12)",
+/* a6-a9 */ "Exchange medium", unknown, "Read(12)", "Play track relative(12)",
+/* aa-ae */ "Write(12)", unknown, "Erase(12)", unknown,
+ "Write and verify(12)",
+/* af-b1 */ "Verify(12)", "Search data high(12)", "Search data equal(12)",
+/* b2-b4 */ "Search data low(12)", "Set limits(12)", unknown,
+/* b5-b6 */ "Request volume element address", "Send volume tag",
+/* b7-b9 */ "Read defect data(12)", "Read element status", unknown,
+/* ba-bf */ unknown, unknown, unknown, unknown, unknown, unknown,
+};
+
+
+
+#define group(opcode) (((opcode) >> 5) & 7)
+
+#define RESERVED_GROUP 0
+#define VENDOR_GROUP 1
+
+static const char **commands[] = {
+ group_0_commands, group_1_commands, group_2_commands,
+ (const char **) RESERVED_GROUP, group_4_commands,
+ group_5_commands, (const char **) VENDOR_GROUP,
+ (const char **) VENDOR_GROUP
+};
+
+static const char reserved[] = "RESERVED";
+static const char vendor[] = "VENDOR SPECIFIC";
+
+static void print_opcode(int opcode) {
+ const char **table = commands[ group(opcode) ];
+ switch ((unsigned long) table) {
+ case RESERVED_GROUP:
+ printk("%s(0x%02x) ", reserved, opcode);
+ break;
+ case VENDOR_GROUP:
+ printk("%s(0x%02x) ", vendor, opcode);
+ break;
+ default:
+ if (table[opcode & 0x1f] != unknown)
+ printk("%s ",table[opcode & 0x1f]);
+ else
+ printk("%s(0x%02x) ", unknown, opcode);
+ break;
+ }
+}
+#else /* CONST & CONST_COMMAND */
+static void print_opcode(int opcode) {
+ printk("0x%02x ", opcode);
+}
+#endif
+
+void print_command (unsigned char *command) {
+ int i,s;
+ print_opcode(command[0]);
+ for ( i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
+ printk("%02x ", command[i]);
+ printk("\n");
+}
+
+#if (CONSTANTS & CONST_STATUS)
+static const char * statuses[] = {
+/* 0-4 */ "Good", "Check Condition", "Condition Met", unknown, "Busy",
+/* 5-9 */ unknown, unknown, unknown, "Intermediate", unknown,
+/* a-c */ "Intermediate-Condition Met", unknown, "Reservation Conflict",
+/* d-10 */ unknown, unknown, unknown, unknown,
+/* 11-14 */ "Command Terminated", unknown, unknown, "Queue Full",
+/* 15-1a */ unknown, unknown, unknown, unknown, unknown, unknown,
+/* 1b-1f */ unknown, unknown, unknown, unknown, unknown,
+};
+#endif
+
+void print_status (int status) {
+ status = (status >> 1) & 0x1f;
+#if (CONSTANTS & CONST_STATUS)
+ printk("%s ",statuses[status]);
+#else
+ printk("0x%0x ", status);
+#endif
+}
+
+#if (CONSTANTS & CONST_XSENSE)
+#define D 0x0001 /* DIRECT ACCESS DEVICE (disk) */
+#define T 0x0002 /* SEQUENTIAL ACCESS DEVICE (tape) */
+#define L 0x0004 /* PRINTER DEVICE */
+#define P 0x0008 /* PROCESSOR DEVICE */
+#define W 0x0010 /* WRITE ONCE READ MULTIPLE DEVICE */
+#define R 0x0020 /* READ ONLY (CD-ROM) DEVICE */
+#define S 0x0040 /* SCANNER DEVICE */
+#define O 0x0080 /* OPTICAL MEMORY DEVICE */
+#define M 0x0100 /* MEDIA CHANGER DEVICE */
+#define C 0x0200 /* COMMUNICATION DEVICE */
+#define A 0x0400 /* ARRAY STORAGE */
+#define E 0x0800 /* ENCLOSURE SERVICES DEVICE */
+#define B 0x1000 /* SIMPLIFIED DIRECT ACCESS DEVICE */
+#define K 0x2000 /* OPTICAL CARD READER/WRITER DEVICE */
+
+struct error_info{
+ unsigned char code1, code2;
+ unsigned short int devices;
+ const char * text;
+};
+
+struct error_info2{
+ unsigned char code1, code2_min, code2_max;
+ unsigned short int devices;
+ const char * text;
+};
+
+static struct error_info2 additional2[] =
+{
+ {0x40,0x00,0x7f,D,"Ram failure (%x)"},
+ {0x40,0x80,0xff,D|T|L|P|W|R|S|O|M|C,"Diagnostic failure on component (%x)"},
+ {0x41,0x00,0xff,D,"Data path failure (%x)"},
+ {0x42,0x00,0xff,D,"Power-on or self-test failure (%x)"},
+ {0, 0, 0, 0, NULL}
+};
+
+static struct error_info additional[] =
+{
+ {0x00,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"No additional sense information"},
+ {0x00,0x01,T,"Filemark detected"},
+ {0x00,0x02,T|S,"End-of-partition/medium detected"},
+ {0x00,0x03,T,"Setmark detected"},
+ {0x00,0x04,T|S,"Beginning-of-partition/medium detected"},
+ {0x00,0x05,T|L|S,"End-of-data detected"},
+ {0x00,0x06,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"I/O process terminated"},
+ {0x00,0x11,R,"Audio play operation in progress"},
+ {0x00,0x12,R,"Audio play operation paused"},
+ {0x00,0x13,R,"Audio play operation successfully completed"},
+ {0x00,0x14,R,"Audio play operation stopped due to error"},
+ {0x00,0x15,R,"No current audio status to return"},
+ {0x00,0x16,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Operation in progress"},
+ {0x00,0x17,D|T|L|W|R|S|O|M|A|E|B|K,"Cleaning requested"},
+ {0x01,0x00,D|W|O|B|K,"No index/sector signal"},
+ {0x02,0x00,D|W|R|O|M|B|K,"No seek complete"},
+ {0x03,0x00,D|T|L|W|S|O|B|K,"Peripheral device write fault"},
+ {0x03,0x01,T,"No write current"},
+ {0x03,0x02,T,"Excessive write errors"},
+ {0x04,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,cause not reportable"},
+ {0x04,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit is in process of becoming ready"},
+ {0x04,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,initializing cmd. required"},
+ {0x04,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,manual intervention required"},
+ {0x04,0x04,D|T|L|R|O|B,"Logical unit not ready,format in progress"},
+ {0x04,0x05,D|T|W|O|M|C|A|B|K,"Logical unit not ready,rebuild in progress"},
+ {0x04,0x06,D|T|W|O|M|C|A|B|K,"Logical unit not ready,recalculation in progress"},
+ {0x04,0x07,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,operation in progress"},
+ {0x04,0x08,R,"Logical unit not ready,long write in progress"},
+ {0x04,0x09,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,self-test in progress"},
+ {0x05,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit does not respond to selection"},
+ {0x06,0x00,D|W|R|O|M|B|K,"No reference position found"},
+ {0x07,0x00,D|T|L|W|R|S|O|M|B|K,"Multiple peripheral devices selected"},
+ {0x08,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit communication failure"},
+ {0x08,0x01,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit communication time-out"},
+ {0x08,0x02,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit communication parity error"},
+ {0x08,0x03,D|T|R|O|M|B|K,"Logical unit communication CRC error (Ultra-DMA/32)"},
+ {0x08,0x04,D|T|L|P|W|R|S|O|C|K,"Unreachable copy target"},
+ {0x09,0x00,D|T|W|R|O|B,"Track following error"},
+ {0x09,0x01,W|R|O|K,"Tracking servo failure"},
+ {0x09,0x02,W|R|O|K,"Focus servo failure"},
+ {0x09,0x03,W|R|O,"Spindle servo failure"},
+ {0x09,0x04,D|T|W|R|O|B,"Head select fault"},
+ {0x0A,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Error log overflow"},
+ {0x0B,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Warning"},
+ {0x0B,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Warning - specified temperature exceeded"},
+ {0x0B,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Warning - enclosure degraded"},
+ {0x0C,0x00,T|R|S,"Write error"},
+ {0x0C,0x01,K,"Write error - recovered with auto reallocation"},
+ {0x0C,0x02,D|W|O|B|K,"Write error - auto reallocation failed"},
+ {0x0C,0x03,D|W|O|B|K,"Write error - recommend reassignment"},
+ {0x0C,0x04,D|T|W|O|B,"Compression check miscompare error"},
+ {0x0C,0x05,D|T|W|O|B,"Data expansion occurred during compression"},
+ {0x0C,0x06,D|T|W|O|B,"Block not compressible"},
+ {0x0C,0x07,R,"Write error - recovery needed"},
+ {0x0C,0x08,R,"Write error - recovery failed"},
+ {0x0C,0x09,R,"Write error - loss of streaming"},
+ {0x0C,0x0A,R,"Write error - padding blocks added"},
+ {0x10,0x00,D|W|O|B|K,"Id CRC or ECC error"},
+ {0x11,0x00,D|T|W|R|S|O|B|K,"Unrecovered read error"},
+ {0x11,0x01,D|T|W|R|S|O|B|K,"Read retries exhausted"},
+ {0x11,0x02,D|T|W|R|S|O|B|K,"Error too long to correct"},
+ {0x11,0x03,D|T|W|S|O|B|K,"Multiple read errors"},
+ {0x11,0x04,D|W|O|B|K,"Unrecovered read error - auto reallocate failed"},
+ {0x11,0x05,W|R|O|B,"L-EC uncorrectable error"},
+ {0x11,0x06,W|R|O|B,"CIRC unrecovered error"},
+ {0x11,0x07,W|O|B,"Data re-synchronization error"},
+ {0x11,0x08,T,"Incomplete block read"},
+ {0x11,0x09,T,"No gap found"},
+ {0x11,0x0A,D|T|O|B|K,"Miscorrected error"},
+ {0x11,0x0B,D|W|O|B|K,"Unrecovered read error - recommend reassignment"},
+ {0x11,0x0C,D|W|O|B|K,"Unrecovered read error - recommend rewrite the data"},
+ {0x11,0x0D,D|T|W|R|O|B,"De-compression CRC error"},
+ {0x11,0x0E,D|T|W|R|O|B,"Cannot decompress using declared algorithm"},
+ {0x11,0x0F,R,"Error reading UPC/EAN number"},
+ {0x11,0x10,R,"Error reading ISRC number"},
+ {0x11,0x11,R,"Read error - loss of streaming"},
+ {0x12,0x00,D|W|O|B|K,"Address mark not found for id field"},
+ {0x13,0x00,D|W|O|B|K,"Address mark not found for data field"},
+ {0x14,0x00,D|T|L|W|R|S|O|B|K,"Recorded entity not found"},
+ {0x14,0x01,D|T|W|R|O|B|K,"Record not found"},
+ {0x14,0x02,T,"Filemark or setmark not found"},
+ {0x14,0x03,T,"End-of-data not found"},
+ {0x14,0x04,T,"Block sequence error"},
+ {0x14,0x05,D|T|W|O|B|K,"Record not found - recommend reassignment"},
+ {0x14,0x06,D|T|W|O|B|K,"Record not found - data auto-reallocated"},
+ {0x15,0x00,D|T|L|W|R|S|O|M|B|K,"Random positioning error"},
+ {0x15,0x01,D|T|L|W|R|S|O|M|B|K,"Mechanical positioning error"},
+ {0x15,0x02,D|T|W|R|O|B|K,"Positioning error detected by read of medium"},
+ {0x16,0x00,D|W|O|B|K,"Data synchronization mark error"},
+ {0x16,0x01,D|W|O|B|K,"Data sync error - data rewritten"},
+ {0x16,0x02,D|W|O|B|K,"Data sync error - recommend rewrite"},
+ {0x16,0x03,D|W|O|B|K,"Data sync error - data auto-reallocated"},
+ {0x16,0x04,D|W|O|B|K,"Data sync error - recommend reassignment"},
+ {0x17,0x00,D|T|W|R|S|O|B|K,"Recovered data with no error correction applied"},
+ {0x17,0x01,D|T|W|R|S|O|B|K,"Recovered data with retries"},
+ {0x17,0x02,D|T|W|R|O|B|K,"Recovered data with positive head offset"},
+ {0x17,0x03,D|T|W|R|O|B|K,"Recovered data with negative head offset"},
+ {0x17,0x04,W|R|O|B,"Recovered data with retries and/or circ applied"},
+ {0x17,0x05,D|W|R|O|B|K,"Recovered data using previous sector id"},
+ {0x17,0x06,D|W|O|B|K,"Recovered data without ecc - data auto-reallocated"},
+ {0x17,0x07,D|W|R|O|B|K,"Recovered data without ecc - recommend reassignment"},
+ {0x17,0x08,D|W|R|O|B|K,"Recovered data without ecc - recommend rewrite"},
+ {0x17,0x09,D|W|R|O|B|K,"Recovered data without ecc - data rewritten"},
+ {0x18,0x00,D|T|W|R|O|B|K,"Recovered data with error correction applied"},
+ {0x18,0x01,D|W|R|O|B|K,"Recovered data with error corr. & retries applied"},
+ {0x18,0x02,D|W|R|O|B|K,"Recovered data - data auto-reallocated"},
+ {0x18,0x03,R,"Recovered data with CIRC"},
+ {0x18,0x04,R,"Recovered data with L-EC"},
+ {0x18,0x05,D|W|R|O|B|K,"Recovered data - recommend reassignment"},
+ {0x18,0x06,D|W|R|O|B|K,"Recovered data - recommend rewrite"},
+ {0x18,0x07,D|W|O|B|K,"Recovered data with ecc - data rewritten"},
+ {0x19,0x00,D|O|K,"Defect list error"},
+ {0x19,0x01,D|O|K,"Defect list not available"},
+ {0x19,0x02,D|O|K,"Defect list error in primary list"},
+ {0x19,0x03,D|O|K,"Defect list error in grown list"},
+ {0x1A,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Parameter list length error"},
+ {0x1B,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Synchronous data transfer error"},
+ {0x1C,0x00,D|O|B|K,"Defect list not found"},
+ {0x1C,0x01,D|O|B|K,"Primary defect list not found"},
+ {0x1C,0x02,D|O|B|K,"Grown defect list not found"},
+ {0x1D,0x00,D|T|W|R|O|B|K,"Miscompare during verify operation"},
+ {0x1E,0x00,D|W|O|B|K,"Recovered id with ecc correction"},
+ {0x1F,0x00,D|O|K,"Partial defect list transfer"},
+ {0x20,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid command operation code"},
+ {0x21,0x00,D|T|W|R|O|M|B|K,"Logical block address out of range"},
+ {0x21,0x01,D|T|W|R|O|M|B|K,"Invalid element address"},
+ {0x22,0x00,D,"Illegal function (use 20 00,24 00,or 26 00)"},
+ {0x24,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid field in cdb"},
+ {0x24,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"CDB decryption error"},
+ {0x25,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not supported"},
+ {0x26,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid field in parameter list"},
+ {0x26,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Parameter not supported"},
+ {0x26,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Parameter value invalid"},
+ {0x26,0x03,D|T|L|P|W|R|S|O|M|C|A|E|K,"Threshold parameters not supported"},
+ {0x26,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid release of persistent reservation"},
+ {0x26,0x05,D|T|L|P|W|R|S|O|M|C|A|B|K,"Data decryption error"},
+ {0x26,0x06,D|T|L|P|W|R|S|O|C|K,"Too many target descriptors"},
+ {0x26,0x07,D|T|L|P|W|R|S|O|C|K,"Unsupported target descriptor type code"},
+ {0x26,0x08,D|T|L|P|W|R|S|O|C|K,"Too many segment descriptors"},
+ {0x26,0x09,D|T|L|P|W|R|S|O|C|K,"Unsupported segment descriptor type code"},
+ {0x26,0x0A,D|T|L|P|W|R|S|O|C|K,"Unexpected inexact segment"},
+ {0x26,0x0B,D|T|L|P|W|R|S|O|C|K,"Inline data length exceeded"},
+ {0x26,0x0C,D|T|L|P|W|R|S|O|C|K,"Invalid operation for copy source or destination"},
+ {0x26,0x0D,D|T|L|P|W|R|S|O|C|K,"Copy segment granularity violation"},
+ {0x27,0x00,D|T|W|R|O|B|K,"Write protected"},
+ {0x27,0x01,D|T|W|R|O|B|K,"Hardware write protected"},
+ {0x27,0x02,D|T|W|R|O|B|K,"Logical unit software write protected"},
+ {0x27,0x03,T|R,"Associated write protect"},
+ {0x27,0x04,T|R,"Persistent write protect"},
+ {0x27,0x05,T|R,"Permanent write protect"},
+ {0x28,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Not ready to ready change,medium may have changed"},
+ {0x28,0x01,D|T|W|R|O|M|B,"Import or export element accessed"},
+ {0x29,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Power on,reset,or bus device reset occurred"},
+ {0x29,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Power on occurred"},
+ {0x29,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Scsi bus reset occurred"},
+ {0x29,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Bus device reset function occurred"},
+ {0x29,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Device internal reset"},
+ {0x29,0x05,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Transceiver mode changed to single-ended"},
+ {0x29,0x06,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Transceiver mode changed to lvd"},
+ {0x2A,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Parameters changed"},
+ {0x2A,0x01,D|T|L|W|R|S|O|M|C|A|E|B|K,"Mode parameters changed"},
+ {0x2A,0x02,D|T|L|W|R|S|O|M|C|A|E|K,"Log parameters changed"},
+ {0x2A,0x03,D|T|L|P|W|R|S|O|M|C|A|E|K,"Reservations preempted"},
+ {0x2A,0x04,D|T|L|P|W|R|S|O|M|C|A|E,"Reservations released"},
+ {0x2A,0x05,D|T|L|P|W|R|S|O|M|C|A|E,"Registrations preempted"},
+ {0x2B,0x00,D|T|L|P|W|R|S|O|C|K,"Copy cannot execute since host cannot disconnect"},
+ {0x2C,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Command sequence error"},
+ {0x2C,0x01,S,"Too many windows specified"},
+ {0x2C,0x02,S,"Invalid combination of windows specified"},
+ {0x2C,0x03,R,"Current program area is not empty"},
+ {0x2C,0x04,R,"Current program area is empty"},
+ {0x2C,0x05,B,"Illegal power condition request"},
+ {0x2D,0x00,T,"Overwrite error on update in place"},
+ {0x2F,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Commands cleared by another initiator"},
+ {0x30,0x00,D|T|W|R|O|M|B|K,"Incompatible medium installed"},
+ {0x30,0x01,D|T|W|R|O|B|K,"Cannot read medium - unknown format"},
+ {0x30,0x02,D|T|W|R|O|B|K,"Cannot read medium - incompatible format"},
+ {0x30,0x03,D|T|R|K,"Cleaning cartridge installed"},
+ {0x30,0x04,D|T|W|R|O|B|K,"Cannot write medium - unknown format"},
+ {0x30,0x05,D|T|W|R|O|B|K,"Cannot write medium - incompatible format"},
+ {0x30,0x06,D|T|W|R|O|B,"Cannot format medium - incompatible medium"},
+ {0x30,0x07,D|T|L|W|R|S|O|M|A|E|B|K,"Cleaning failure"},
+ {0x30,0x08,R,"Cannot write - application code mismatch"},
+ {0x30,0x09,R,"Current session not fixated for append"},
+ {0x31,0x00,D|T|W|R|O|B|K,"Medium format corrupted"},
+ {0x31,0x01,D|L|R|O|B,"Format command failed"},
+ {0x32,0x00,D|W|O|B|K,"No defect spare location available"},
+ {0x32,0x01,D|W|O|B|K,"Defect list update failure"},
+ {0x33,0x00,T,"Tape length error"},
+ {0x34,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure failure"},
+ {0x35,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services failure"},
+ {0x35,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Unsupported enclosure function"},
+ {0x35,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services unavailable"},
+ {0x35,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services transfer failure"},
+ {0x35,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services transfer refused"},
+ {0x36,0x00,L,"Ribbon,ink,or toner failure"},
+ {0x37,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Rounded parameter"},
+ {0x38,0x00,B,"Event status notification"},
+ {0x38,0x02,B,"Esn - power management class event"},
+ {0x38,0x04,B,"Esn - media class event"},
+ {0x38,0x06,B,"Esn - device busy class event"},
+ {0x39,0x00,D|T|L|W|R|S|O|M|C|A|E|K,"Saving parameters not supported"},
+ {0x3A,0x00,D|T|L|W|R|S|O|M|B|K,"Medium not present"},
+ {0x3A,0x01,D|T|W|R|O|M|B|K,"Medium not present - tray closed"},
+ {0x3A,0x02,D|T|W|R|O|M|B|K,"Medium not present - tray open"},
+ {0x3A,0x03,D|T|W|R|O|M|B,"Medium not present - loadable"},
+ {0x3A,0x04,D|T|W|R|O|M|B,"Medium not present - medium auxiliary memory accessible"},
+ {0x3B,0x00,T|L,"Sequential positioning error"},
+ {0x3B,0x01,T,"Tape position error at beginning-of-medium"},
+ {0x3B,0x02,T,"Tape position error at end-of-medium"},
+ {0x3B,0x03,L,"Tape or electronic vertical forms unit not ready"},
+ {0x3B,0x04,L,"Slew failure"},
+ {0x3B,0x05,L,"Paper jam"},
+ {0x3B,0x06,L,"Failed to sense top-of-form"},
+ {0x3B,0x07,L,"Failed to sense bottom-of-form"},
+ {0x3B,0x08,T,"Reposition error"},
+ {0x3B,0x09,S,"Read past end of medium"},
+ {0x3B,0x0A,S,"Read past beginning of medium"},
+ {0x3B,0x0B,S,"Position past end of medium"},
+ {0x3B,0x0C,T|S,"Position past beginning of medium"},
+ {0x3B,0x0D,D|T|W|R|O|M|B|K,"Medium destination element full"},
+ {0x3B,0x0E,D|T|W|R|O|M|B|K,"Medium source element empty"},
+ {0x3B,0x0F,R,"End of medium reached"},
+ {0x3B,0x11,D|T|W|R|O|M|B|K,"Medium magazine not accessible"},
+ {0x3B,0x12,D|T|W|R|O|M|B|K,"Medium magazine removed"},
+ {0x3B,0x13,D|T|W|R|O|M|B|K,"Medium magazine inserted"},
+ {0x3B,0x14,D|T|W|R|O|M|B|K,"Medium magazine locked"},
+ {0x3B,0x15,D|T|W|R|O|M|B|K,"Medium magazine unlocked"},
+ {0x3B,0x16,R,"Mechanical positioning or changer error"},
+ {0x3D,0x00,D|T|L|P|W|R|S|O|M|C|A|E|K,"Invalid bits in identify message"},
+ {0x3E,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit has not self-configured yet"},
+ {0x3E,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit failure"},
+ {0x3E,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Timeout on logical unit"},
+ {0x3E,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit failed self-test"},
+ {0x3E,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit unable to update self-test log"},
+ {0x3F,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Target operating conditions have changed"},
+ {0x3F,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Microcode has been changed"},
+ {0x3F,0x02,D|T|L|P|W|R|S|O|M|C|B|K,"Changed operating definition"},
+ {0x3F,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Inquiry data has changed"},
+ {0x3F,0x04,D|T|W|R|O|M|C|A|E|B|K,"Component device attached"},
+ {0x3F,0x05,D|T|W|R|O|M|C|A|E|B|K,"Device identifier changed"},
+ {0x3F,0x06,D|T|W|R|O|M|C|A|E|B,"Redundancy group created or modified"},
+ {0x3F,0x07,D|T|W|R|O|M|C|A|E|B,"Redundancy group deleted"},
+ {0x3F,0x08,D|T|W|R|O|M|C|A|E|B,"Spare created or modified"},
+ {0x3F,0x09,D|T|W|R|O|M|C|A|E|B,"Spare deleted"},
+ {0x3F,0x0A,D|T|W|R|O|M|C|A|E|B|K,"Volume set created or modified"},
+ {0x3F,0x0B,D|T|W|R|O|M|C|A|E|B|K,"Volume set deleted"},
+ {0x3F,0x0C,D|T|W|R|O|M|C|A|E|B|K,"Volume set deassigned"},
+ {0x3F,0x0D,D|T|W|R|O|M|C|A|E|B|K,"Volume set reassigned"},
+ {0x3F,0x0E,D|T|L|P|W|R|S|O|M|C|A|E,"Reported luns data has changed"},
+ {0x3F,0x10,D|T|W|R|O|M|B,"Medium loadable"},
+ {0x3F,0x11,D|T|W|R|O|M|B,"Medium auxiliary memory accessible"},
+ {0x40,0x00,D,"Ram failure (should use 40 nn)"},
+ /*
+ * FIXME(eric) - need a way to represent wildcards here.
+ */
+ {0x40,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Diagnostic failure on component nn (80h-ffh)"},
+ {0x41,0x00,D,"Data path failure (should use 40 nn)"},
+ {0x42,0x00,D,"Power-on or self-test failure (should use 40 nn)"},
+ {0x43,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Message error"},
+ {0x44,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Internal target failure"},
+ {0x45,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Select or reselect failure"},
+ {0x46,0x00,D|T|L|P|W|R|S|O|M|C|B|K,"Unsuccessful soft reset"},
+ {0x47,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Scsi parity error"},
+ {0x47,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Data phase CRC error detected"},
+ {0x47,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Scsi parity error detected during st data phase"},
+ {0x47,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Information unit CRC error detected"},
+ {0x47,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Asynchronous information protection error detected"},
+ {0x48,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Initiator detected error message received"},
+ {0x49,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid message error"},
+ {0x4A,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Command phase error"},
+ {0x4B,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Data phase error"},
+ {0x4C,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit failed self-configuration"},
+ /*
+ * FIXME(eric) - need a way to represent wildcards here.
+ */
+ {0x4D,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Tagged overlapped commands (nn = queue tag)"},
+ {0x4E,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Overlapped commands attempted"},
+ {0x50,0x00,T,"Write append error"},
+ {0x50,0x01,T,"Write append position error"},
+ {0x50,0x02,T,"Position error related to timing"},
+ {0x51,0x00,T|R|O,"Erase failure"},
+ {0x52,0x00,T,"Cartridge fault"},
+ {0x53,0x00,D|T|L|W|R|S|O|M|B|K,"Media load or eject failed"},
+ {0x53,0x01,T,"Unload tape failure"},
+ {0x53,0x02,D|T|W|R|O|M|B|K,"Medium removal prevented"},
+ {0x54,0x00,P,"Scsi to host system interface failure"},
+ {0x55,0x00,P,"System resource failure"},
+ {0x55,0x01,D|O|B|K,"System buffer full"},
+ {0x55,0x02,D|T|L|P|W|R|S|O|M|A|E|K,"Insufficient reservation resources"},
+ {0x55,0x03,D|T|L|P|W|R|S|O|M|C|A|E,"Insufficient resources"},
+ {0x55,0x04,D|T|L|P|W|R|S|O|M|A|E,"Insufficient registration resources"},
+ {0x57,0x00,R,"Unable to recover table-of-contents"},
+ {0x58,0x00,O,"Generation does not exist"},
+ {0x59,0x00,O,"Updated block read"},
+ {0x5A,0x00,D|T|L|P|W|R|S|O|M|B|K,"Operator request or state change input"},
+ {0x5A,0x01,D|T|W|R|O|M|B|K,"Operator medium removal request"},
+ {0x5A,0x02,D|T|W|R|O|A|B|K,"Operator selected write protect"},
+ {0x5A,0x03,D|T|W|R|O|A|B|K,"Operator selected write permit"},
+ {0x5B,0x00,D|T|L|P|W|R|S|O|M|K,"Log exception"},
+ {0x5B,0x01,D|T|L|P|W|R|S|O|M|K,"Threshold condition met"},
+ {0x5B,0x02,D|T|L|P|W|R|S|O|M|K,"Log counter at maximum"},
+ {0x5B,0x03,D|T|L|P|W|R|S|O|M|K,"Log list codes exhausted"},
+ {0x5C,0x00,D|O,"Rpl status change"},
+ {0x5C,0x01,D|O,"Spindles synchronized"},
+ {0x5C,0x02,D|O,"Spindles not synchronized"},
+ {0x5D,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Failure prediction threshold exceeded"},
+ {0x5D,0x01,R|B,"Media failure prediction threshold exceeded"},
+ {0x5D,0x02,R,"Logical unit failure prediction threshold exceeded"},
+ {0x5D,0x10,D|B,"Hardware impending failure general hard drive failure"},
+ {0x5D,0x11,D|B,"Hardware impending failure drive error rate too high"},
+ {0x5D,0x12,D|B,"Hardware impending failure data error rate too high"},
+ {0x5D,0x13,D|B,"Hardware impending failure seek error rate too high"},
+ {0x5D,0x14,D|B,"Hardware impending failure too many block reassigns"},
+ {0x5D,0x15,D|B,"Hardware impending failure access times too high"},
+ {0x5D,0x16,D|B,"Hardware impending failure start unit times too high"},
+ {0x5D,0x17,D|B,"Hardware impending failure channel parametrics"},
+ {0x5D,0x18,D|B,"Hardware impending failure controller detected"},
+ {0x5D,0x19,D|B,"Hardware impending failure throughput performance"},
+ {0x5D,0x1A,D|B,"Hardware impending failure seek time performance"},
+ {0x5D,0x1B,D|B,"Hardware impending failure spin-up retry count"},
+ {0x5D,0x1C,D|B,"Hardware impending failure drive calibration retry count"},
+ {0x5D,0x20,D|B,"Controller impending failure general hard drive failure"},
+ {0x5D,0x21,D|B,"Controller impending failure drive error rate too high"},
+ {0x5D,0x22,D|B,"Controller impending failure data error rate too high"},
+ {0x5D,0x23,D|B,"Controller impending failure seek error rate too high"},
+ {0x5D,0x24,D|B,"Controller impending failure too many block reassigns"},
+ {0x5D,0x25,D|B,"Controller impending failure access times too high"},
+ {0x5D,0x26,D|B,"Controller impending failure start unit times too high"},
+ {0x5D,0x27,D|B,"Controller impending failure channel parametrics"},
+ {0x5D,0x28,D|B,"Controller impending failure controller detected"},
+ {0x5D,0x29,D|B,"Controller impending failure throughput performance"},
+ {0x5D,0x2A,D|B,"Controller impending failure seek time performance"},
+ {0x5D,0x2B,D|B,"Controller impending failure spin-up retry count"},
+ {0x5D,0x2C,D|B,"Controller impending failure drive calibration retry count"},
+ {0x5D,0x30,D|B,"Data channel impending failure general hard drive failure"},
+ {0x5D,0x31,D|B,"Data channel impending failure drive error rate too high"},
+ {0x5D,0x32,D|B,"Data channel impending failure data error rate too high"},
+ {0x5D,0x33,D|B,"Data channel impending failure seek error rate too high"},
+ {0x5D,0x34,D|B,"Data channel impending failure too many block reassigns"},
+ {0x5D,0x35,D|B,"Data channel impending failure access times too high"},
+ {0x5D,0x36,D|B,"Data channel impending failure start unit times too high"},
+ {0x5D,0x37,D|B,"Data channel impending failure channel parametrics"},
+ {0x5D,0x38,D|B,"Data channel impending failure controller detected"},
+ {0x5D,0x39,D|B,"Data channel impending failure throughput performance"},
+ {0x5D,0x3A,D|B,"Data channel impending failure seek time performance"},
+ {0x5D,0x3B,D|B,"Data channel impending failure spin-up retry count"},
+ {0x5D,0x3C,D|B,"Data channel impending failure drive calibration retry count"},
+ {0x5D,0x40,D|B,"Servo impending failure general hard drive failure"},
+ {0x5D,0x41,D|B,"Servo impending failure drive error rate too high"},
+ {0x5D,0x42,D|B,"Servo impending failure data error rate too high"},
+ {0x5D,0x43,D|B,"Servo impending failure seek error rate too high"},
+ {0x5D,0x44,D|B,"Servo impending failure too many block reassigns"},
+ {0x5D,0x45,D|B,"Servo impending failure access times too high"},
+ {0x5D,0x46,D|B,"Servo impending failure start unit times too high"},
+ {0x5D,0x47,D|B,"Servo impending failure channel parametrics"},
+ {0x5D,0x48,D|B,"Servo impending failure controller detected"},
+ {0x5D,0x49,D|B,"Servo impending failure throughput performance"},
+ {0x5D,0x4A,D|B,"Servo impending failure seek time performance"},
+ {0x5D,0x4B,D|B,"Servo impending failure spin-up retry count"},
+ {0x5D,0x4C,D|B,"Servo impending failure drive calibration retry count"},
+ {0x5D,0x50,D|B,"Spindle impending failure general hard drive failure"},
+ {0x5D,0x51,D|B,"Spindle impending failure drive error rate too high"},
+ {0x5D,0x52,D|B,"Spindle impending failure data error rate too high"},
+ {0x5D,0x53,D|B,"Spindle impending failure seek error rate too high"},
+ {0x5D,0x54,D|B,"Spindle impending failure too many block reassigns"},
+ {0x5D,0x55,D|B,"Spindle impending failure access times too high"},
+ {0x5D,0x56,D|B,"Spindle impending failure start unit times too high"},
+ {0x5D,0x57,D|B,"Spindle impending failure channel parametrics"},
+ {0x5D,0x58,D|B,"Spindle impending failure controller detected"},
+ {0x5D,0x59,D|B,"Spindle impending failure throughput performance"},
+ {0x5D,0x5A,D|B,"Spindle impending failure seek time performance"},
+ {0x5D,0x5B,D|B,"Spindle impending failure spin-up retry count"},
+ {0x5D,0x5C,D|B,"Spindle impending failure drive calibration retry count"},
+ {0x5D,0x60,D|B,"Firmware impending failure general hard drive failure"},
+ {0x5D,0x61,D|B,"Firmware impending failure drive error rate too high"},
+ {0x5D,0x62,D|B,"Firmware impending failure data error rate too high"},
+ {0x5D,0x63,D|B,"Firmware impending failure seek error rate too high"},
+ {0x5D,0x64,D|B,"Firmware impending failure too many block reassigns"},
+ {0x5D,0x65,D|B,"Firmware impending failure access times too high"},
+ {0x5D,0x66,D|B,"Firmware impending failure start unit times too high"},
+ {0x5D,0x67,D|B,"Firmware impending failure channel parametrics"},
+ {0x5D,0x68,D|B,"Firmware impending failure controller detected"},
+ {0x5D,0x69,D|B,"Firmware impending failure throughput performance"},
+ {0x5D,0x6A,D|B,"Firmware impending failure seek time performance"},
+ {0x5D,0x6B,D|B,"Firmware impending failure spin-up retry count"},
+ {0x5D,0x6C,D|B,"Firmware impending failure drive calibration retry count"},
+ {0x5D,0xFF,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Failure prediction threshold exceeded (false)"},
+ {0x5E,0x00,D|T|L|P|W|R|S|O|C|A|K,"Low power condition on"},
+ {0x5E,0x01,D|T|L|P|W|R|S|O|C|A|K,"Idle condition activated by timer"},
+ {0x5E,0x02,D|T|L|P|W|R|S|O|C|A|K,"Standby condition activated by timer"},
+ {0x5E,0x03,D|T|L|P|W|R|S|O|C|A|K,"Idle condition activated by command"},
+ {0x5E,0x04,D|T|L|P|W|R|S|O|C|A|K,"Standby condition activated by command"},
+ {0x5E,0x41,B,"Power state change to active"},
+ {0x5E,0x42,B,"Power state change to idle"},
+ {0x5E,0x43,B,"Power state change to standby"},
+ {0x5E,0x45,B,"Power state change to sleep"},
+ {0x5E,0x47,B|K,"Power state change to device control"},
+ {0x60,0x00,S,"Lamp failure"},
+ {0x61,0x00,S,"Video acquisition error"},
+ {0x61,0x01,S,"Unable to acquire video"},
+ {0x61,0x02,S,"Out of focus"},
+ {0x62,0x00,S,"Scan head positioning error"},
+ {0x63,0x00,R,"End of user area encountered on this track"},
+ {0x63,0x01,R,"Packet does not fit in available space"},
+ {0x64,0x00,R,"Illegal mode for this track"},
+ {0x64,0x01,R,"Invalid packet size"},
+ {0x65,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Voltage fault"},
+ {0x66,0x00,S,"Automatic document feeder cover up"},
+ {0x66,0x01,S,"Automatic document feeder lift up"},
+ {0x66,0x02,S,"Document jam in automatic document feeder"},
+ {0x66,0x03,S,"Document miss feed automatic in document feeder"},
+ {0x67,0x00,A,"Configuration failure"},
+ {0x67,0x01,A,"Configuration of incapable logical units failed"},
+ {0x67,0x02,A,"Add logical unit failed"},
+ {0x67,0x03,A,"Modification of logical unit failed"},
+ {0x67,0x04,A,"Exchange of logical unit failed"},
+ {0x67,0x05,A,"Remove of logical unit failed"},
+ {0x67,0x06,A,"Attachment of logical unit failed"},
+ {0x67,0x07,A,"Creation of logical unit failed"},
+ {0x67,0x08,A,"Assign failure occurred"},
+ {0x67,0x09,A,"Multiply assigned logical unit"},
+ {0x68,0x00,A,"Logical unit not configured"},
+ {0x69,0x00,A,"Data loss on logical unit"},
+ {0x69,0x01,A,"Multiple logical unit failures"},
+ {0x69,0x02,A,"Parity/data mismatch"},
+ {0x6A,0x00,A,"Informational,refer to log"},
+ {0x6B,0x00,A,"State change has occurred"},
+ {0x6B,0x01,A,"Redundancy level got better"},
+ {0x6B,0x02,A,"Redundancy level got worse"},
+ {0x6C,0x00,A,"Rebuild failure occurred"},
+ {0x6D,0x00,A,"Recalculate failure occurred"},
+ {0x6E,0x00,A,"Command to logical unit failed"},
+ {0x6F,0x00,R,"Copy protection key exchange failure - authentication failure"},
+ {0x6F,0x01,R,"Copy protection key exchange failure - key not present"},
+ {0x6F,0x02,R,"Copy protection key exchange failure - key not established"},
+ {0x6F,0x03,R,"Read of scrambled sector without authentication"},
+ {0x6F,0x04,R,"Media region code is mismatched to logical unit region"},
+ {0x6F,0x05,R,"Drive region must be permanent/region reset count error"},
+ /*
+ * FIXME(eric) - need a way to represent wildcards here.
+ */
+ {0x70,0x00,T,"Decompression exception short algorithm id of nn"},
+ {0x71,0x00,T,"Decompression exception long algorithm id"},
+ {0x72,0x00,R,"Session fixation error"},
+ {0x72,0x01,R,"Session fixation error writing lead-in"},
+ {0x72,0x02,R,"Session fixation error writing lead-out"},
+ {0x72,0x03,R,"Session fixation error - incomplete track in session"},
+ {0x72,0x04,R,"Empty or partially written reserved track"},
+ {0x72,0x05,R,"No more track reservations allowed"},
+ {0x73,0x00,R,"Cd control error"},
+ {0x73,0x01,R,"Power calibration area almost full"},
+ {0x73,0x02,R,"Power calibration area is full"},
+ {0x73,0x03,R,"Power calibration area error"},
+ {0x73,0x04,R,"Program memory area update failure"},
+ {0x73,0x05,R,"Program memory area is full"},
+ {0x73,0x06,R,"RMA/PMA is full"},
+ {0, 0, 0, NULL}
+};
+#endif
+
+#if (CONSTANTS & CONST_SENSE)
+static const char *snstext[] = {
+ "None", /* There is no sense information */
+ "Recovered Error", /* The last command completed successfully
+ but used error correction */
+ "Not Ready", /* The addressed target is not ready */
+ "Medium Error", /* Data error detected on the medium */
+ "Hardware Error", /* Controller or device failure */
+ "Illegal Request",
+ "Unit Attention", /* Removable medium was changed, or
+ the target has been reset */
+ "Data Protect", /* Access to the data is blocked */
+ "Blank Check", /* Reached unexpected written or unwritten
+ region of the medium */
+ "Key=9", /* Vendor specific */
+ "Copy Aborted", /* COPY or COMPARE was aborted */
+ "Aborted Command", /* The target aborted the command */
+ "Equal", /* A SEARCH DATA command found data equal */
+ "Volume Overflow", /* Medium full with still data to be written */
+ "Miscompare", /* Source data and data on the medium
+ do not agree */
+ "Key=15" /* Reserved */
+};
+#endif
+
+/* Print sense information */
+static
+void print_sense_internal(const char * devclass,
+ const unsigned char * sense_buffer,
+ kdev_t dev)
+{
+ int i, s;
+ int sense_class, valid, code, info;
+ const char * error = NULL;
+
+ sense_class = (sense_buffer[0] >> 4) & 0x07;
+ code = sense_buffer[0] & 0xf;
+ valid = sense_buffer[0] & 0x80;
+
+ if (sense_class == 7) { /* extended sense data */
+ s = sense_buffer[7] + 8;
+ if(s > SCSI_SENSE_BUFFERSIZE)
+ s = SCSI_SENSE_BUFFERSIZE;
+
+ info = ((sense_buffer[3] << 24) | (sense_buffer[4] << 16) |
+ (sense_buffer[5] << 8) | sense_buffer[6]);
+ if (info || valid) {
+ printk("Info fld=0x%x", info);
+ if (!valid) /* info data not according to standard */
+ printk(" (nonstd)");
+ printk(", ");
+ }
+ if (sense_buffer[2] & 0x80)
+ printk( "FMK "); /* current command has read a filemark */
+ if (sense_buffer[2] & 0x40)
+ printk( "EOM "); /* end-of-medium condition exists */
+ if (sense_buffer[2] & 0x20)
+ printk( "ILI "); /* incorrect block length requested */
+
+ switch (code) {
+ case 0x0:
+ error = "Current"; /* error concerns current command */
+ break;
+ case 0x1:
+ error = "Deferred"; /* error concerns some earlier command */
+ /* e.g., an earlier write to disk cache succeeded, but
+ now the disk discovers that it cannot write the data */
+ break;
+ default:
+ error = "Invalid";
+ }
+
+ printk("%s ", error);
+
+#if (CONSTANTS & CONST_SENSE)
+ printk( "%s%s: sense key %s\n", devclass,
+ kdevname(dev), snstext[sense_buffer[2] & 0x0f]);
+#else
+ printk("%s%s: sns = %2x %2x\n", devclass,
+ kdevname(dev), sense_buffer[0], sense_buffer[2]);
+#endif
+
+ /* Check to see if additional sense information is available */
+ if(sense_buffer[7] + 7 < 13 ||
+ (sense_buffer[12] == 0 && sense_buffer[13] == 0)) goto done;
+
+#if (CONSTANTS & CONST_XSENSE)
+ for(i=0; additional[i].text; i++)
+ if(additional[i].code1 == sense_buffer[12] &&
+ additional[i].code2 == sense_buffer[13])
+ printk("Additional sense indicates %s\n", additional[i].text);
+
+ for(i=0; additional2[i].text; i++)
+ if(additional2[i].code1 == sense_buffer[12] &&
+ additional2[i].code2_min >= sense_buffer[13] &&
+ additional2[i].code2_max <= sense_buffer[13]) {
+ printk("Additional sense indicates ");
+ printk(additional2[i].text, sense_buffer[13]);
+ printk("\n");
+ };
+#else
+ printk("ASC=%2x ASCQ=%2x\n", sense_buffer[12], sense_buffer[13]);
+#endif
+ } else { /* non-extended sense data */
+
+ /*
+ * Standard says:
+ * sense_buffer[0] & 0200 : address valid
+ * sense_buffer[0] & 0177 : vendor-specific error code
+ * sense_buffer[1] & 0340 : vendor-specific
+ * sense_buffer[1..3] : 21-bit logical block address
+ */
+
+#if (CONSTANTS & CONST_SENSE)
+ if (sense_buffer[0] < 15)
+ printk("%s%s: old sense key %s\n", devclass,
+ kdevname(dev), snstext[sense_buffer[0] & 0x0f]);
+ else
+#endif
+ printk("%s%s: sns = %2x %2x\n", devclass,
+ kdevname(dev), sense_buffer[0], sense_buffer[2]);
+
+ printk("Non-extended sense class %d code 0x%0x\n", sense_class, code);
+ s = 4;
+ }
+
+ done:
+#if !(CONSTANTS & CONST_SENSE)
+ printk("Raw sense data:");
+ for (i = 0; i < s; ++i)
+ printk("0x%02x ", sense_buffer[i]);
+ printk("\n");
+#endif
+ return;
+}
+
+void print_sense(const char * devclass, Scsi_Cmnd * SCpnt)
+{
+ print_sense_internal(devclass, SCpnt->sense_buffer,
+ SCpnt->request.rq_dev);
+}
+
+void print_req_sense(const char * devclass, Scsi_Request * SRpnt)
+{
+ print_sense_internal(devclass, SRpnt->sr_sense_buffer,
+ SRpnt->sr_request.rq_dev);
+}
+
+#if (CONSTANTS & CONST_MSG)
+static const char *one_byte_msgs[] = {
+/* 0x00 */ "Command Complete", NULL, "Save Pointers",
+/* 0x03 */ "Restore Pointers", "Disconnect", "Initiator Error",
+/* 0x06 */ "Abort", "Message Reject", "Nop", "Message Parity Error",
+/* 0x0a */ "Linked Command Complete", "Linked Command Complete w/flag",
+/* 0x0c */ "Bus device reset", "Abort Tag", "Clear Queue",
+/* 0x0f */ "Initiate Recovery", "Release Recovery"
+};
+
+#define NO_ONE_BYTE_MSGS (sizeof(one_byte_msgs) / sizeof (const char *))
+
+static const char *two_byte_msgs[] = {
+/* 0x20 */ "Simple Queue Tag", "Head of Queue Tag", "Ordered Queue Tag"
+/* 0x23 */ "Ignore Wide Residue"
+};
+
+#define NO_TWO_BYTE_MSGS (sizeof(two_byte_msgs) / sizeof (const char *))
+
+static const char *extended_msgs[] = {
+/* 0x00 */ "Modify Data Pointer", "Synchronous Data Transfer Request",
+/* 0x02 */ "SCSI-I Extended Identify", "Wide Data Transfer Request"
+};
+
+#define NO_EXTENDED_MSGS (sizeof(two_byte_msgs) / sizeof (const char *))
+#endif /* (CONSTANTS & CONST_MSG) */
+
+int print_msg (const unsigned char *msg) {
+ int len = 0, i;
+ if (msg[0] == EXTENDED_MESSAGE) {
+ len = 3 + msg[1];
+#if (CONSTANTS & CONST_MSG)
+ if (msg[2] < NO_EXTENDED_MSGS)
+ printk ("%s ", extended_msgs[msg[2]]);
+ else
+ printk ("Extended Message, reserved code (0x%02x) ", (int) msg[2]);
+ switch (msg[2]) {
+ case EXTENDED_MODIFY_DATA_POINTER:
+ printk("pointer = %d", (int) (msg[3] << 24) | (msg[4] << 16) |
+ (msg[5] << 8) | msg[6]);
+ break;
+ case EXTENDED_SDTR:
+ printk("period = %d ns, offset = %d", (int) msg[3] * 4, (int)
+ msg[4]);
+ break;
+ case EXTENDED_WDTR:
+ printk("width = 2^%d bytes", msg[3]);
+ break;
+ default:
+ for (i = 2; i < len; ++i)
+ printk("%02x ", msg[i]);
+ }
+#else
+ for (i = 0; i < len; ++i)
+ printk("%02x ", msg[i]);
+#endif
+ /* Identify */
+ } else if (msg[0] & 0x80) {
+#if (CONSTANTS & CONST_MSG)
+ printk("Identify disconnect %sallowed %s %d ",
+ (msg[0] & 0x40) ? "" : "not ",
+ (msg[0] & 0x20) ? "target routine" : "lun",
+ msg[0] & 0x7);
+#else
+ printk("%02x ", msg[0]);
+#endif
+ len = 1;
+ /* Normal One byte */
+ } else if (msg[0] < 0x1f) {
+#if (CONSTANTS & CONST_MSG)
+ if (msg[0] < NO_ONE_BYTE_MSGS)
+ printk(one_byte_msgs[msg[0]]);
+ else
+ printk("reserved (%02x) ", msg[0]);
+#else
+ printk("%02x ", msg[0]);
+#endif
+ len = 1;
+ /* Two byte */
+ } else if (msg[0] <= 0x2f) {
+#if (CONSTANTS & CONST_MSG)
+ if ((msg[0] - 0x20) < NO_TWO_BYTE_MSGS)
+ printk("%s %02x ", two_byte_msgs[msg[0] - 0x20],
+ msg[1]);
+ else
+ printk("reserved two byte (%02x %02x) ",
+ msg[0], msg[1]);
+#else
+ printk("%02x %02x", msg[0], msg[1]);
+#endif
+ len = 2;
+ } else
+#if (CONSTANTS & CONST_MSG)
+ printk(reserved);
+#else
+ printk("%02x ", msg[0]);
+#endif
+ return len;
+}
+
+void print_Scsi_Cmnd (Scsi_Cmnd *cmd) {
+ printk("scsi%d : destination target %d, lun %d\n",
+ cmd->host->host_no,
+ cmd->target,
+ cmd->lun);
+ printk(" command = ");
+ print_command (cmd->cmnd);
+}
+
+#if (CONSTANTS & CONST_HOST)
+static const char * hostbyte_table[]={
+"DID_OK", "DID_NO_CONNECT", "DID_BUS_BUSY", "DID_TIME_OUT", "DID_BAD_TARGET",
+"DID_ABORT", "DID_PARITY", "DID_ERROR", "DID_RESET", "DID_BAD_INTR",
+"DID_PASSTHROUGH", "DID_SOFT_ERROR", NULL};
+
+void print_hostbyte(int scsiresult)
+{ static int maxcode=0;
+ int i;
+
+ if(!maxcode) {
+ for(i=0;hostbyte_table[i];i++) ;
+ maxcode=i-1;
+ }
+ printk("Hostbyte=0x%02x",host_byte(scsiresult));
+ if(host_byte(scsiresult)>maxcode) {
+ printk("is invalid ");
+ return;
+ }
+ printk("(%s) ",hostbyte_table[host_byte(scsiresult)]);
+}
+#else
+void print_hostbyte(int scsiresult)
+{ printk("Hostbyte=0x%02x ",host_byte(scsiresult));
+}
+#endif
+
+#if (CONSTANTS & CONST_DRIVER)
+static const char * driverbyte_table[]={
+"DRIVER_OK", "DRIVER_BUSY", "DRIVER_SOFT", "DRIVER_MEDIA", "DRIVER_ERROR",
+"DRIVER_INVALID", "DRIVER_TIMEOUT", "DRIVER_HARD",NULL };
+
+static const char * driversuggest_table[]={"SUGGEST_OK",
+"SUGGEST_RETRY", "SUGGEST_ABORT", "SUGGEST_REMAP", "SUGGEST_DIE",
+unknown,unknown,unknown, "SUGGEST_SENSE",NULL};
+
+
+void print_driverbyte(int scsiresult)
+{ static int driver_max=0,suggest_max=0;
+ int i,dr=driver_byte(scsiresult)&DRIVER_MASK,
+ su=(driver_byte(scsiresult)&SUGGEST_MASK)>>4;
+
+ if(!driver_max) {
+ for(i=0;driverbyte_table[i];i++) ;
+ driver_max=i;
+ for(i=0;driversuggest_table[i];i++) ;
+ suggest_max=i;
+ }
+ printk("Driverbyte=0x%02x",driver_byte(scsiresult));
+ printk("(%s,%s) ",
+ dr<driver_max ? driverbyte_table[dr]:"invalid",
+ su<suggest_max ? driversuggest_table[su]:"invalid");
+}
+#else
+void print_driverbyte(int scsiresult)
+{ printk("Driverbyte=0x%02x ",driver_byte(scsiresult));
+}
+#endif
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/constants.h b/xen/drivers/scsi/constants.h
new file mode 100644
index 0000000000..e10527ea5e
--- /dev/null
+++ b/xen/drivers/scsi/constants.h
@@ -0,0 +1,6 @@
+#ifndef _CONSTANTS_H
+#define _CONSTANTS_H
+extern int print_msg(unsigned char *);
+extern void print_status(int);
+extern void print_Scsi_Cmnd (Scsi_Cmnd *);
+#endif /* def _CONSTANTS_H */
diff --git a/xen/drivers/scsi/hosts.c b/xen/drivers/scsi/hosts.c
new file mode 100644
index 0000000000..ea613aaa47
--- /dev/null
+++ b/xen/drivers/scsi/hosts.c
@@ -0,0 +1,316 @@
+/*
+ * hosts.c Copyright (C) 1992 Drew Eckhardt
+ * Copyright (C) 1993, 1994, 1995 Eric Youngdale
+ *
+ * mid to lowlevel SCSI driver interface
+ * Initial versions: Drew Eckhardt
+ * Subsequent revisions: Eric Youngdale
+ *
+ * <drew@colorado.edu>
+ *
+ * Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ * Added QLOGIC QLA1280 SCSI controller kernel host support.
+ * August 4, 1999 Fred Lewis, Intel DuPont
+ *
+ * Updated to reflect the new initialization scheme for the higher
+ * level of scsi drivers (sd/sr/st)
+ * September 17, 2000 Torben Mathiasen <tmm@image.dk>
+ */
+
+
+/*
+ * This file contains the medium level SCSI
+ * host interface initialization, as well as the scsi_hosts array of SCSI
+ * hosts currently present in the system.
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/blk.h>
+/* #include <xeno/kernel.h> */
+/* #include <xeno/string.h> */
+/* #include <xeno/mm.h> */
+/* #include <xeno/proc_fs.h> */
+#include <xeno/init.h>
+
+#define __KERNEL_SYSCALLS__
+
+/* #include <xeno/unistd.h> */
+
+#include "scsi.h"
+#include "hosts.h"
+
+/*
+static const char RCSid[] = "$Header: /vger/u4/cvs/linux/drivers/scsi/hosts.c,v 1.20 1996/12/12 19:18:32 davem Exp $";
+*/
+
+/*
+ * The scsi host entries should be in the order you wish the
+ * cards to be detected. A driver may appear more than once IFF
+ * it can deal with being detected (and therefore initialized)
+ * with more than one simultaneous host number, can handle being
+ * reentrant, etc.
+ *
+ * They may appear in any order, as each SCSI host is told which host
+ * number it is during detection.
+ */
+
+/* This is a placeholder for controllers that are not configured into
+ * the system - we do this to ensure that the controller numbering is
+ * always consistent, no matter how the kernel is configured. */
+
+#define NO_CONTROLLER {NULL, NULL, NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, 0, 0, 0, 0, 0, 0}
+
+/*
+ * When figure is run, we don't want to link to any object code. Since
+ * the macro for each host will contain function pointers, we cannot
+ * use it and instead must use a "blank" that does no such
+ * idiocy.
+ */
+
+Scsi_Host_Template * scsi_hosts;
+
+
+/*
+ * Our semaphores and timeout counters, where size depends on
+ * MAX_SCSI_HOSTS here.
+ */
+
+Scsi_Host_Name * scsi_host_no_list;
+struct Scsi_Host * scsi_hostlist;
+struct Scsi_Device_Template * scsi_devicelist;
+
+int max_scsi_hosts;
+int next_scsi_host;
+
+void
+scsi_unregister(struct Scsi_Host * sh){
+ struct Scsi_Host * shpnt;
+ Scsi_Host_Name *shn;
+
+ if(scsi_hostlist == sh)
+ scsi_hostlist = sh->next;
+ else {
+ shpnt = scsi_hostlist;
+ while(shpnt->next != sh) shpnt = shpnt->next;
+ shpnt->next = shpnt->next->next;
+ }
+
+ /*
+ * We have to unregister the host from the scsi_host_no_list as well.
+ * Decide by the host_no not by the name because most host drivers are
+ * able to handle more than one adapters from the same kind (or family).
+ */
+ for ( shn=scsi_host_no_list; shn && (sh->host_no != shn->host_no);
+ shn=shn->next);
+ if (shn) shn->host_registered = 0;
+ /* else {} : This should not happen, we should panic here... */
+
+ /* If we are removing the last host registered, it is safe to reuse
+ * its host number (this avoids "holes" at boot time) (DB)
+ * It is also safe to reuse those of numbers directly below which have
+ * been released earlier (to avoid some holes in numbering).
+ */
+ if(sh->host_no == max_scsi_hosts - 1) {
+ while(--max_scsi_hosts >= next_scsi_host) {
+ shpnt = scsi_hostlist;
+ while(shpnt && shpnt->host_no != max_scsi_hosts - 1)
+ shpnt = shpnt->next;
+ if(shpnt)
+ break;
+ }
+ }
+ next_scsi_host--;
+ kfree((char *) sh);
+}
+
+/* We call this when we come across a new host adapter. We only do this
+ * once we are 100% sure that we want to use this host adapter - it is a
+ * pain to reverse this, so we try to avoid it
+ */
+
+struct Scsi_Host * scsi_register(Scsi_Host_Template * tpnt, int j){
+ struct Scsi_Host * retval, *shpnt, *o_shp;
+ Scsi_Host_Name *shn, *shn2;
+ int flag_new = 1;
+ const char * hname;
+ size_t hname_len;
+ retval = (struct Scsi_Host *)kmalloc(sizeof(struct Scsi_Host) + j,
+ (tpnt->unchecked_isa_dma && j ?
+ GFP_DMA : 0) | GFP_ATOMIC);
+ if(retval == NULL)
+ {
+ printk("scsi: out of memory in scsi_register.\n");
+ return NULL;
+ }
+
+ memset(retval, 0, sizeof(struct Scsi_Host) + j);
+
+ /* trying to find a reserved entry (host_no) */
+ hname = (tpnt->proc_name) ? tpnt->proc_name : "";
+ hname_len = strlen(hname);
+ for (shn = scsi_host_no_list;shn;shn = shn->next) {
+ if (!(shn->host_registered) &&
+ (hname_len > 0) && (0 == strncmp(hname, shn->name, hname_len))) {
+ flag_new = 0;
+ retval->host_no = shn->host_no;
+ shn->host_registered = 1;
+ shn->loaded_as_module = 1;
+ break;
+ }
+ }
+ atomic_set(&retval->host_active,0);
+ retval->host_busy = 0;
+ retval->host_failed = 0;
+ if(j > 0xffff) panic("Too many extra bytes requested\n");
+ retval->extra_bytes = j;
+ retval->loaded_as_module = 1;
+ if (flag_new) {
+ shn = (Scsi_Host_Name *) kmalloc(sizeof(Scsi_Host_Name), GFP_ATOMIC);
+ if (!shn) {
+ kfree(retval);
+ printk(KERN_ERR "scsi: out of memory(2) in scsi_register.\n");
+ return NULL;
+ }
+ shn->name = kmalloc(hname_len + 1, GFP_ATOMIC);
+ if (hname_len > 0)
+ strncpy(shn->name, hname, hname_len);
+ shn->name[hname_len] = 0;
+ shn->host_no = max_scsi_hosts++;
+ shn->host_registered = 1;
+ shn->loaded_as_module = 1;
+ shn->next = NULL;
+ if (scsi_host_no_list) {
+ for (shn2 = scsi_host_no_list;shn2->next;shn2 = shn2->next)
+ ;
+ shn2->next = shn;
+ }
+ else
+ scsi_host_no_list = shn;
+ retval->host_no = shn->host_no;
+ }
+ next_scsi_host++;
+ retval->host_queue = NULL;
+#if 0
+ init_waitqueue_head(&retval->host_wait);
+#endif
+ retval->resetting = 0;
+ retval->last_reset = 0;
+ retval->irq = 0;
+ retval->dma_channel = 0xff;
+
+ /* These three are default values which can be overridden */
+ retval->max_channel = 0;
+ retval->max_id = 8;
+ retval->max_lun = 8;
+
+ /*
+ * All drivers right now should be able to handle 12 byte commands.
+ * Every so often there are requests for 16 byte commands, but individual
+ * low-level drivers need to certify that they actually do something
+ * sensible with such commands.
+ */
+ retval->max_cmd_len = 12;
+
+ retval->unique_id = 0;
+ retval->io_port = 0;
+ retval->hostt = tpnt;
+ retval->next = NULL;
+ retval->in_recovery = 0;
+ retval->ehandler = NULL; /* Initial value until the thing starts up. */
+ retval->eh_notify = NULL; /* Who we notify when we exit. */
+
+
+ retval->host_blocked = FALSE;
+ retval->host_self_blocked = FALSE;
+
+#ifdef DEBUG
+ printk("Register %x %x: %d\n", (int)retval, (int)retval->hostt, j);
+#endif
+
+ /* The next six are the default values which can be overridden
+ * if need be */
+ retval->this_id = tpnt->this_id;
+ retval->can_queue = tpnt->can_queue;
+ retval->sg_tablesize = tpnt->sg_tablesize;
+ retval->cmd_per_lun = tpnt->cmd_per_lun;
+ retval->unchecked_isa_dma = tpnt->unchecked_isa_dma;
+ retval->use_clustering = tpnt->use_clustering;
+
+ retval->select_queue_depths = tpnt->select_queue_depths;
+ retval->max_sectors = tpnt->max_sectors;
+
+ if(!scsi_hostlist)
+ scsi_hostlist = retval;
+ else {
+ shpnt = scsi_hostlist;
+ if (retval->host_no < shpnt->host_no) {
+ retval->next = shpnt;
+ wmb(); /* want all to see these writes in this order */
+ scsi_hostlist = retval;
+ }
+ else {
+ for (o_shp = shpnt, shpnt = shpnt->next; shpnt;
+ o_shp = shpnt, shpnt = shpnt->next) {
+ if (retval->host_no < shpnt->host_no) {
+ retval->next = shpnt;
+ wmb();
+ o_shp->next = retval;
+ break;
+ }
+ }
+ if (! shpnt)
+ o_shp->next = retval;
+ }
+ }
+
+ return retval;
+}
+
+int
+scsi_register_device(struct Scsi_Device_Template * sdpnt)
+{
+ if(sdpnt->next) panic("Device already registered");
+ sdpnt->next = scsi_devicelist;
+ scsi_devicelist = sdpnt;
+ return 0;
+}
+
+void
+scsi_deregister_device(struct Scsi_Device_Template * tpnt)
+{
+ struct Scsi_Device_Template *spnt;
+ struct Scsi_Device_Template *prev_spnt;
+
+ spnt = scsi_devicelist;
+ prev_spnt = NULL;
+ while (spnt != tpnt) {
+ prev_spnt = spnt;
+ spnt = spnt->next;
+ }
+ if (prev_spnt == NULL)
+ scsi_devicelist = tpnt->next;
+ else
+ prev_spnt->next = spnt->next;
+}
+
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/hosts.h b/xen/drivers/scsi/hosts.h
new file mode 100644
index 0000000000..34d3592e0e
--- /dev/null
+++ b/xen/drivers/scsi/hosts.h
@@ -0,0 +1,591 @@
+/*
+ * hosts.h Copyright (C) 1992 Drew Eckhardt
+ * Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale
+ *
+ * mid to low-level SCSI driver interface header
+ * Initial versions: Drew Eckhardt
+ * Subsequent revisions: Eric Youngdale
+ *
+ * <drew@colorado.edu>
+ *
+ * Modified by Eric Youngdale eric@andante.org to
+ * add scatter-gather, multiple outstanding request, and other
+ * enhancements.
+ *
+ * Further modified by Eric Youngdale to support multiple host adapters
+ * of the same type.
+ *
+ * Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ */
+
+#ifndef _HOSTS_H
+#define _HOSTS_H
+
+/*
+ $Header: /vger/u4/cvs/linux/drivers/scsi/hosts.h,v 1.6 1997/01/19 23:07:13 davem Exp $
+*/
+
+#include <xeno/config.h>
+/*#include <xeno/proc_fs.h>*/
+#include <xeno/pci.h>
+
+/* It is senseless to set SG_ALL any higher than this - the performance
+ * does not get any better, and it wastes memory
+ */
+#define SG_NONE 0
+#define SG_ALL 0xff
+
+#define DISABLE_CLUSTERING 0
+#define ENABLE_CLUSTERING 1
+
+/* The various choices mean:
+ * NONE: Self evident. Host adapter is not capable of scatter-gather.
+ * ALL: Means that the host adapter module can do scatter-gather,
+ * and that there is no limit to the size of the table to which
+ * we scatter/gather data.
+ * Anything else: Indicates the maximum number of chains that can be
+ * used in one scatter-gather request.
+ */
+
+/*
+ * The Scsi_Host_Template type has all that is needed to interface with a SCSI
+ * host in a device independent matter. There is one entry for each different
+ * type of host adapter that is supported on the system.
+ */
+
+typedef struct scsi_disk Disk;
+
+typedef struct SHT
+{
+
+ /* Used with loadable modules so we can construct a linked list. */
+ struct SHT * next;
+
+ /* Used with loadable modules so that we know when it is safe to unload */
+ struct module * module;
+
+#ifdef CONFIG_PROC_FS
+ /* The pointer to the /proc/scsi directory entry */
+ struct proc_dir_entry *proc_dir;
+
+ /* proc-fs info function.
+ * Can be used to export driver statistics and other infos to the world
+ * outside the kernel ie. userspace and it also provides an interface
+ * to feed the driver with information. Check eata_dma_proc.c for reference
+ */
+ int (*proc_info)(char *, char **, off_t, int, int, int);
+#endif
+
+ /*
+ * The name pointer is a pointer to the name of the SCSI
+ * device detected.
+ */
+ const char *name;
+
+ /*
+ * The detect function shall return non zero on detection,
+ * indicating the number of host adapters of this particular
+ * type were found. It should also
+ * initialize all data necessary for this particular
+ * SCSI driver. It is passed the host number, so this host
+ * knows where the first entry is in the scsi_hosts[] array.
+ *
+ * Note that the detect routine MUST not call any of the mid level
+ * functions to queue commands because things are not guaranteed
+ * to be set up yet. The detect routine can send commands to
+ * the host adapter as long as the program control will not be
+ * passed to scsi.c in the processing of the command. Note
+ * especially that scsi_malloc/scsi_free must not be called.
+ */
+ int (* detect)(struct SHT *);
+
+ int (*revoke)(Scsi_Device *);
+
+ /* Used with loadable modules to unload the host structures. Note:
+ * there is a default action built into the modules code which may
+ * be sufficient for most host adapters. Thus you may not have to supply
+ * this at all.
+ */
+ int (*release)(struct Scsi_Host *);
+
+ /*
+ * The info function will return whatever useful
+ * information the developer sees fit. If not provided, then
+ * the name field will be used instead.
+ */
+ const char *(* info)(struct Scsi_Host *);
+
+ /*
+ * ioctl interface
+ */
+ int (*ioctl)(Scsi_Device *dev, int cmd, void *arg);
+
+ /*
+ * The command function takes a target, a command (this is a SCSI
+ * command formatted as per the SCSI spec, nothing strange), a
+ * data buffer pointer, and data buffer length pointer. The return
+ * is a status int, bit fielded as follows :
+ * Byte What
+ * 0 SCSI status code
+ * 1 SCSI 1 byte message
+ * 2 host error return.
+ * 3 mid level error return
+ */
+ int (* command)(Scsi_Cmnd *);
+
+ /*
+ * The QueueCommand function works in a similar manner
+ * to the command function. It takes an additional parameter,
+ * void (* done)(int host, int code) which is passed the host
+ * # and exit result when the command is complete.
+ * Host number is the POSITION IN THE hosts array of THIS
+ * host adapter.
+ *
+ * The done() function must only be called after QueueCommand()
+ * has returned.
+ */
+ int (* queuecommand)(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+
+ /*
+ * This is an error handling strategy routine. You don't need to
+ * define one of these if you don't want to - there is a default
+ * routine that is present that should work in most cases. For those
+ * driver authors that have the inclination and ability to write their
+ * own strategy routine, this is where it is specified. Note - the
+ * strategy routine is *ALWAYS* run in the context of the kernel eh
+ * thread. Thus you are guaranteed to *NOT* be in an interrupt handler
+ * when you execute this, and you are also guaranteed to *NOT* have any
+ * other commands being queued while you are in the strategy routine.
+ * When you return from this function, operations return to normal.
+ *
+ * See scsi_error.c scsi_unjam_host for additional comments about what
+ * this function should and should not be attempting to do.
+ */
+ int (*eh_strategy_handler)(struct Scsi_Host *);
+ int (*eh_abort_handler)(Scsi_Cmnd *);
+ int (*eh_device_reset_handler)(Scsi_Cmnd *);
+ int (*eh_bus_reset_handler)(Scsi_Cmnd *);
+ int (*eh_host_reset_handler)(Scsi_Cmnd *);
+
+ /*
+ * Since the mid level driver handles time outs, etc, we want to
+ * be able to abort the current command. Abort returns 0 if the
+ * abortion was successful. The field SCpnt->abort reason
+ * can be filled in with the appropriate reason why we wanted
+ * the abort in the first place, and this will be used
+ * in the mid-level code instead of the host_byte().
+ * If non-zero, the code passed to it
+ * will be used as the return code, otherwise
+ * DID_ABORT should be returned.
+ *
+ * Note that the scsi driver should "clean up" after itself,
+ * resetting the bus, etc. if necessary.
+ *
+ * NOTE - this interface is depreciated, and will go away. Use
+ * the eh_ routines instead.
+ */
+ int (* abort)(Scsi_Cmnd *);
+
+ /*
+ * The reset function will reset the SCSI bus. Any executing
+ * commands should fail with a DID_RESET in the host byte.
+ * The Scsi_Cmnd is passed so that the reset routine can figure
+ * out which host adapter should be reset, and also which command
+ * within the command block was responsible for the reset in
+ * the first place. Some hosts do not implement a reset function,
+ * and these hosts must call scsi_request_sense(SCpnt) to keep
+ * the command alive.
+ *
+ * NOTE - this interface is depreciated, and will go away. Use
+ * the eh_ routines instead.
+ */
+ int (* reset)(Scsi_Cmnd *, unsigned int);
+
+ /*
+ * This function is used to select synchronous communications,
+ * which will result in a higher data throughput. Not implemented
+ * yet.
+ */
+ int (* slave_attach)(int, int);
+
+ /*
+ * This function determines the bios parameters for a given
+ * harddisk. These tend to be numbers that are made up by
+ * the host adapter. Parameters:
+ * size, device number, list (heads, sectors, cylinders)
+ */
+ int (* bios_param)(Disk *, kdev_t, int []);
+
+
+ /*
+ * Used to set the queue depth for a specific device.
+ */
+ void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *);
+
+ /*
+ * This determines if we will use a non-interrupt driven
+ * or an interrupt driven scheme, It is set to the maximum number
+ * of simultaneous commands a given host adapter will accept.
+ */
+ int can_queue;
+
+ /*
+ * In many instances, especially where disconnect / reconnect are
+ * supported, our host also has an ID on the SCSI bus. If this is
+ * the case, then it must be reserved. Please set this_id to -1 if
+ * your setup is in single initiator mode, and the host lacks an
+ * ID.
+ */
+ int this_id;
+
+ /*
+ * This determines the degree to which the host adapter is capable
+ * of scatter-gather.
+ */
+ short unsigned int sg_tablesize;
+
+ /*
+ * if the host adapter has limitations beside segment count
+ */
+ short unsigned int max_sectors;
+
+ /*
+ * True if this host adapter can make good use of linked commands.
+ * This will allow more than one command to be queued to a given
+ * unit on a given host. Set this to the maximum number of command
+ * blocks to be provided for each device. Set this to 1 for one
+ * command block per lun, 2 for two, etc. Do not set this to 0.
+ * You should make sure that the host adapter will do the right thing
+ * before you try setting this above 1.
+ */
+ short cmd_per_lun;
+
+ /*
+ * present contains counter indicating how many boards of this
+ * type were found when we did the scan.
+ */
+ unsigned char present;
+
+ /*
+ * true if this host adapter uses unchecked DMA onto an ISA bus.
+ */
+ unsigned unchecked_isa_dma:1;
+
+ /*
+ * true if this host adapter can make good use of clustering.
+ * I originally thought that if the tablesize was large that it
+ * was a waste of CPU cycles to prepare a cluster list, but
+ * it works out that the Buslogic is faster if you use a smaller
+ * number of segments (i.e. use clustering). I guess it is
+ * inefficient.
+ */
+ unsigned use_clustering:1;
+
+ /*
+ * True if this driver uses the new error handling code. This flag is
+ * really only temporary until all of the other drivers get converted
+ * to use the new error handling code.
+ */
+ unsigned use_new_eh_code:1;
+
+ /*
+ * True for emulated SCSI host adapters (e.g. ATAPI)
+ */
+ unsigned emulated:1;
+
+ /*
+ * Name of proc directory
+ */
+ char *proc_name;
+
+} Scsi_Host_Template;
+
+/*
+ * The scsi_hosts array is the array containing the data for all
+ * possible <supported> scsi hosts. This is similar to the
+ * Scsi_Host_Template, except that we have one entry for each
+ * actual physical host adapter on the system, stored as a linked
+ * list. Note that if there are 2 aha1542 boards, then there will
+ * be two Scsi_Host entries, but only 1 Scsi_Host_Template entry.
+ */
+
+struct Scsi_Host
+{
+/* private: */
+ /*
+ * This information is private to the scsi mid-layer. Wrapping it in a
+ * struct private is a way of marking it in a sort of C++ type of way.
+ */
+ struct Scsi_Host * next;
+ Scsi_Device * host_queue;
+
+
+ struct task_struct * ehandler; /* Error recovery thread. */
+ struct semaphore * eh_wait; /* The error recovery thread waits on
+ this. */
+ struct semaphore * eh_notify; /* wait for eh to begin */
+ struct semaphore * eh_action; /* Wait for specific actions on the
+ host. */
+ unsigned int eh_active:1; /* Indicates the eh thread is awake and active if
+ this is true. */
+#if 0
+ wait_queue_head_t host_wait;
+#endif
+ Scsi_Host_Template * hostt;
+ atomic_t host_active; /* commands checked out */
+ volatile unsigned short host_busy; /* commands actually active on low-level */
+ volatile unsigned short host_failed; /* commands that failed. */
+
+/* public: */
+ unsigned short extra_bytes;
+ unsigned short host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
+ int resetting; /* if set, it means that last_reset is a valid value */
+ unsigned long last_reset;
+
+
+ /*
+ * These three parameters can be used to allow for wide scsi,
+ * and for host adapters that support multiple busses
+ * The first two should be set to 1 more than the actual max id
+ * or lun (i.e. 8 for normal systems).
+ */
+ unsigned int max_id;
+ unsigned int max_lun;
+ unsigned int max_channel;
+
+ /* These parameters should be set by the detect routine */
+ unsigned long base;
+ unsigned long io_port;
+ unsigned char n_io_port;
+ unsigned char dma_channel;
+ unsigned int irq;
+
+ /*
+ * This is a unique identifier that must be assigned so that we
+ * have some way of identifying each detected host adapter properly
+ * and uniquely. For hosts that do not support more than one card
+ * in the system at one time, this does not need to be set. It is
+ * initialized to 0 in scsi_register.
+ */
+ unsigned int unique_id;
+
+ /*
+ * The rest can be copied from the template, or specifically
+ * initialized, as required.
+ */
+
+ /*
+ * The maximum length of SCSI commands that this host can accept.
+ * Probably 12 for most host adapters, but could be 16 for others.
+ * For drivers that don't set this field, a value of 12 is
+ * assumed. I am leaving this as a number rather than a bit
+ * because you never know what subsequent SCSI standards might do
+ * (i.e. could there be a 20 byte or a 24-byte command a few years
+ * down the road?).
+ */
+ unsigned char max_cmd_len;
+
+ int this_id;
+ int can_queue;
+ short cmd_per_lun;
+ short unsigned int sg_tablesize;
+ short unsigned int max_sectors;
+
+ unsigned in_recovery:1;
+ unsigned unchecked_isa_dma:1;
+ unsigned use_clustering:1;
+ /*
+ * True if this host was loaded as a loadable module
+ */
+ unsigned loaded_as_module:1;
+
+ /*
+ * Host has rejected a command because it was busy.
+ */
+ unsigned host_blocked:1;
+
+ /*
+ * Host has requested that no further requests come through for the
+ * time being.
+ */
+ unsigned host_self_blocked:1;
+
+ /*
+ * Host uses correct SCSI ordering not PC ordering. The bit is
+ * set for the minority of drivers whose authors actually read the spec ;)
+ */
+ unsigned reverse_ordering:1;
+
+ /*
+ * Indicates that one or more devices on this host were starved, and
+ * when the device becomes less busy that we need to feed them.
+ */
+ unsigned some_device_starved:1;
+
+ void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *);
+
+ /*
+ * For SCSI hosts which are PCI devices, set pci_dev so that
+ * we can do BIOS EDD 3.0 mappings
+ */
+ struct pci_dev *pci_dev;
+
+ /*
+ * We should ensure that this is aligned, both for better performance
+ * and also because some compilers (m68k) don't automatically force
+ * alignment to a long boundary.
+ */
+ unsigned long hostdata[0] /* Used for storage of host specific stuff */
+ __attribute__ ((aligned (sizeof(unsigned long))));
+};
+
+/*
+ * These two functions are used to allocate and free a pseudo device
+ * which will connect to the host adapter itself rather than any
+ * physical device. You must deallocate when you are done with the
+ * thing. This physical pseudo-device isn't real and won't be available
+ * from any high-level drivers.
+ */
+extern void scsi_free_host_dev(Scsi_Device * SDpnt);
+extern Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt);
+
+extern void scsi_unblock_requests(struct Scsi_Host * SHpnt);
+extern void scsi_block_requests(struct Scsi_Host * SHpnt);
+extern void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel);
+
+typedef struct SHN
+ {
+ struct SHN * next;
+ char * name;
+ unsigned short host_no;
+ unsigned short host_registered;
+ unsigned loaded_as_module;
+ } Scsi_Host_Name;
+
+extern Scsi_Host_Name * scsi_host_no_list;
+extern struct Scsi_Host * scsi_hostlist;
+extern struct Scsi_Device_Template * scsi_devicelist;
+
+extern Scsi_Host_Template * scsi_hosts;
+
+extern void build_proc_dir_entries(Scsi_Host_Template *);
+
+/*
+ * scsi_init initializes the scsi hosts.
+ */
+
+extern int next_scsi_host;
+
+unsigned int scsi_init(void);
+extern struct Scsi_Host * scsi_register(Scsi_Host_Template *, int j);
+extern void scsi_unregister(struct Scsi_Host * i);
+
+extern void scsi_register_blocked_host(struct Scsi_Host * SHpnt);
+extern void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt);
+
+static inline void scsi_set_pci_device(struct Scsi_Host *SHpnt,
+ struct pci_dev *pdev)
+{
+ SHpnt->pci_dev = pdev;
+}
+
+
+/*
+ * Prototypes for functions/data in scsi_scan.c
+ */
+extern void scan_scsis(struct Scsi_Host *shpnt,
+ uint hardcoded,
+ uint hchannel,
+ uint hid,
+ uint hlun);
+
+extern void scsi_mark_host_reset(struct Scsi_Host *Host);
+
+#define BLANK_HOST {"", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+struct Scsi_Device_Template
+{
+ struct Scsi_Device_Template * next;
+ const char * name;
+ const char * tag;
+ struct module * module; /* Used for loadable modules */
+ unsigned char scsi_type;
+ unsigned int major;
+ unsigned int min_major; /* Minimum major in range. */
+ unsigned int max_major; /* Maximum major in range. */
+ unsigned int nr_dev; /* Number currently attached */
+ unsigned int dev_noticed; /* Number of devices detected. */
+ unsigned int dev_max; /* Current size of arrays */
+ unsigned blk:1; /* 0 if character device */
+ int (*detect)(Scsi_Device *); /* Returns 1 if we can attach this device */
+ int (*init)(void); /* Sizes arrays based upon number of devices
+ * detected */
+ void (*finish)(void); /* Perform initialization after attachment */
+ int (*attach)(Scsi_Device *); /* Attach devices to arrays */
+ void (*detach)(Scsi_Device *);
+ int (*init_command)(Scsi_Cmnd *); /* Used by new queueing code.
+ Selects command for blkdevs */
+};
+
+void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt);
+
+int scsi_register_device(struct Scsi_Device_Template * sdpnt);
+void scsi_deregister_device(struct Scsi_Device_Template * tpnt);
+
+#if 0
+/* These are used by loadable modules */
+extern int scsi_register_module(int, void *);
+extern int scsi_unregister_module(int, void *);
+#endif
+
+/* The different types of modules that we can load and unload */
+#define MODULE_SCSI_HA 1
+#define MODULE_SCSI_CONST 2
+#define MODULE_SCSI_IOCTL 3
+#define MODULE_SCSI_DEV 4
+
+
+/*
+ * This is an ugly hack. If we expect to be able to load devices at run time,
+ * we need to leave extra room in some of the data structures. Doing a
+ * realloc to enlarge the structures would be riddled with race conditions,
+ * so until a better solution is discovered, we use this crude approach
+ *
+ * Even bigger hack for SparcSTORAGE arrays. Those are at least 6 disks, but
+ * usually up to 30 disks, so everyone would need to change this. -jj
+ *
+ * Note: These things are all evil and all need to go away. My plan is to
+ * tackle the character devices first, as there aren't any locking implications
+ * in the block device layer. The block devices will require more work.
+ *
+ * The generics driver has been updated to resize as required. So as the tape
+ * driver. Two down, two more to go.
+ */
+#ifndef CONFIG_SD_EXTRA_DEVS
+#define CONFIG_SD_EXTRA_DEVS 2
+#endif
+#ifndef CONFIG_SR_EXTRA_DEVS
+#define CONFIG_SR_EXTRA_DEVS 2
+#endif
+#define SD_EXTRA_DEVS CONFIG_SD_EXTRA_DEVS
+#define SR_EXTRA_DEVS CONFIG_SR_EXTRA_DEVS
+
+#endif
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi.c b/xen/drivers/scsi/scsi.c
new file mode 100644
index 0000000000..85a59f54ac
--- /dev/null
+++ b/xen/drivers/scsi/scsi.c
@@ -0,0 +1,2999 @@
+/*
+ * scsi.c Copyright (C) 1992 Drew Eckhardt
+ * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale
+ *
+ * generic mid-level SCSI driver
+ * Initial versions: Drew Eckhardt
+ * Subsequent revisions: Eric Youngdale
+ *
+ * <drew@colorado.edu>
+ *
+ * Bug correction thanks go to :
+ * Rik Faith <faith@cs.unc.edu>
+ * Tommy Thorn <tthorn>
+ * Thomas Wuensche <tw@fgb1.fgb.mw.tu-muenchen.de>
+ *
+ * Modified by Eric Youngdale eric@andante.org or ericy@gnu.ai.mit.edu to
+ * add scatter-gather, multiple outstanding request, and other
+ * enhancements.
+ *
+ * Native multichannel, wide scsi, /proc/scsi and hot plugging
+ * support added by Michael Neuffer <mike@i-connect.net>
+ *
+ * Added request_module("scsi_hostadapter") for kerneld:
+ * (Put an "alias scsi_hostadapter your_hostadapter" in /etc/modules.conf)
+ * Bjorn Ekwall <bj0rn@blox.se>
+ * (changed to kmod)
+ *
+ * Major improvements to the timeout, abort, and reset processing,
+ * as well as performance modifications for large queue depths by
+ * Leonard N. Zubkoff <lnz@dandelion.com>
+ *
+ * Converted cli() code to spinlocks, Ingo Molnar
+ *
+ * Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ *
+ * out_of_space hacks, D. Gilbert (dpg) 990608
+ */
+
+#define REVISION "Revision: 1.00"
+#define VERSION "Id: scsi.c 1.00 2000/09/26"
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+#include <xeno/lib.h>
+#include <xeno/slab.h>
+#include <xeno/ioport.h>
+/*#include <xeno/stat.h>*/
+#include <xeno/blk.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+#include <xeno/init.h>
+/*#include <xeno/smp_lock.h>*/
+/*#include <xeno/completion.h>*/
+
+#define __KERNEL_SYSCALLS__
+
+/*#include <xeno/unistd.h>*/
+#include <xeno/spinlock.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+#include <asm/uaccess.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <xeno/kmod.h>
+#endif
+
+#undef USE_STATIC_SCSI_MEMORY
+
+struct proc_dir_entry *proc_scsi;
+
+#ifdef CONFIG_PROC_FS
+static int scsi_proc_info(char *buffer, char **start, off_t offset, int length);
+static void scsi_dump_status(int level);
+#endif
+
+/*
+ static const char RCSid[] = "$Header: /vger/u4/cvs/linux/drivers/scsi/scsi.c,v 1.38 1997/01/19 23:07:18 davem Exp $";
+ */
+
+/*
+ * Definitions and constants.
+ */
+
+#define MIN_RESET_DELAY (2*HZ)
+
+/* Do not call reset on error if we just did a reset within 15 sec. */
+#define MIN_RESET_PERIOD (15*HZ)
+
+/*
+ * Macro to determine the size of SCSI command. This macro takes vendor
+ * unique commands into account. SCSI commands in groups 6 and 7 are
+ * vendor unique and we will depend upon the command length being
+ * supplied correctly in cmd_len.
+ */
+#define CDB_SIZE(SCpnt) ((((SCpnt->cmnd[0] >> 5) & 7) < 6) ? \
+ COMMAND_SIZE(SCpnt->cmnd[0]) : SCpnt->cmd_len)
+
+/*
+ * Data declarations.
+ */
+unsigned long scsi_pid;
+Scsi_Cmnd *last_cmnd;
+/* Command group 3 is reserved and should never be used. */
+const unsigned char scsi_command_size[8] =
+{
+ 6, 10, 10, 12,
+ 16, 12, 10, 10
+};
+static unsigned long serial_number;
+static Scsi_Cmnd *scsi_bh_queue_head;
+static Scsi_Cmnd *scsi_bh_queue_tail;
+
+/*
+ * Note - the initial logging level can be set here to log events at boot time.
+ * After the system is up, you may enable logging via the /proc interface.
+ */
+unsigned int scsi_logging_level;
+
+const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE] =
+{
+ "Direct-Access ",
+ "Sequential-Access",
+ "Printer ",
+ "Processor ",
+ "WORM ",
+ "CD-ROM ",
+ "Scanner ",
+ "Optical Device ",
+ "Medium Changer ",
+ "Communications ",
+ "Unknown ",
+ "Unknown ",
+ "Unknown ",
+ "Enclosure ",
+};
+
+/*
+ * Function prototypes.
+ */
+extern void scsi_times_out(Scsi_Cmnd * SCpnt);
+void scsi_build_commandblocks(Scsi_Device * SDpnt);
+
+#if 0
+/*
+ * These are the interface to the old error handling code. It should go away
+ * someday soon.
+ */
+extern void scsi_old_done(Scsi_Cmnd * SCpnt);
+extern void scsi_old_times_out(Scsi_Cmnd * SCpnt);
+extern int scsi_old_reset(Scsi_Cmnd *SCpnt, unsigned int flag);
+#endif
+
+/*
+ * Private interface into the new error handling code.
+ */
+extern int scsi_new_reset(Scsi_Cmnd *SCpnt, unsigned int flag);
+
+/*
+ * Function: scsi_initialize_queue()
+ *
+ * Purpose: Selects queue handler function for a device.
+ *
+ * Arguments: SDpnt - device for which we need a handler function.
+ *
+ * Returns: Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes: Most devices will end up using scsi_request_fn for the
+ * handler function (at least as things are done now).
+ * The "block" feature basically ensures that only one of
+ * the blocked hosts is active at one time, mainly to work around
+ * buggy DMA chipsets where the memory gets starved.
+ * For this case, we have a special handler function, which
+ * does some checks and ultimately calls scsi_request_fn.
+ *
+ * The single_lun feature is a similar special case.
+ *
+ * We handle these things by stacking the handlers. The
+ * special case handlers simply check a few conditions,
+ * and return if they are not supposed to do anything.
+ * In the event that things are OK, then they call the next
+ * handler in the list - ultimately they call scsi_request_fn
+ * to do the dirty deed.
+ */
+void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) {
+ blk_init_queue(&SDpnt->request_queue, scsi_request_fn);
+ blk_queue_headactive(&SDpnt->request_queue, 0);
+ SDpnt->request_queue.queuedata = (void *) SDpnt;
+}
+
+#ifdef MODULE
+MODULE_PARM(scsi_logging_level, "i");
+MODULE_PARM_DESC(scsi_logging_level, "SCSI logging level; should be zero or nonzero");
+
+#else
+static int __init scsi_logging_setup(char *str)
+{
+#if 0
+ int tmp;
+
+ if (get_option(&str, &tmp) == 1) {
+ scsi_logging_level = (tmp ? ~0 : 0);
+ return 1;
+ } else {
+ printk(KERN_INFO "scsi_logging_setup : usage scsi_logging_level=n "
+ "(n should be 0 or non-zero)\n");
+ return 0;
+ }
+#else
+ return 0;
+#endif
+
+}
+__setup("scsi_logging=", scsi_logging_setup);
+
+#endif
+
+/*
+ * Issue a command and wait for it to complete
+ */
+
+static void scsi_wait_done(Scsi_Cmnd * SCpnt)
+{
+ struct request *req;
+
+ req = &SCpnt->request;
+ req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */
+
+#if 0
+ if (req->waiting != NULL) {
+ complete(req->waiting);
+ }
+#endif
+}
+
+/*
+ * This lock protects the freelist for all devices on the system.
+ * We could make this finer grained by having a single lock per
+ * device if it is ever found that there is excessive contention
+ * on this lock.
+ */
+static spinlock_t device_request_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Used to protect insertion into and removal from the queue of
+ * commands to be processed by the bottom half handler.
+ */
+static spinlock_t scsi_bhqueue_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Function: scsi_allocate_request
+ *
+ * Purpose: Allocate a request descriptor.
+ *
+ * Arguments: device - device for which we want a request
+ *
+ * Lock status: No locks assumed to be held. This function is SMP-safe.
+ *
+ * Returns: Pointer to request block.
+ *
+ * Notes: With the new queueing code, it becomes important
+ * to track the difference between a command and a
+ * request. A request is a pending item in the queue that
+ * has not yet reached the top of the queue.
+ */
+
+Scsi_Request *scsi_allocate_request(Scsi_Device * device)
+{
+ Scsi_Request *SRpnt = NULL;
+
+ if (!device)
+ panic("No device passed to scsi_allocate_request().\n");
+
+ SRpnt = (Scsi_Request *) kmalloc(sizeof(Scsi_Request), GFP_ATOMIC);
+ if( SRpnt == NULL )
+ {
+ return NULL;
+ }
+
+ memset(SRpnt, 0, sizeof(Scsi_Request));
+ SRpnt->sr_device = device;
+ SRpnt->sr_host = device->host;
+ SRpnt->sr_magic = SCSI_REQ_MAGIC;
+ SRpnt->sr_data_direction = SCSI_DATA_UNKNOWN;
+
+ return SRpnt;
+}
+
+/*
+ * Function: scsi_release_request
+ *
+ * Purpose: Release a request descriptor.
+ *
+ * Arguments: device - device for which we want a request
+ *
+ * Lock status: No locks assumed to be held. This function is SMP-safe.
+ *
+ * Returns: Pointer to request block.
+ *
+ * Notes: With the new queueing code, it becomes important
+ * to track the difference between a command and a
+ * request. A request is a pending item in the queue that
+ * has not yet reached the top of the queue. We still need
+ * to free a request when we are done with it, of course.
+ */
+void scsi_release_request(Scsi_Request * req)
+{
+ if( req->sr_command != NULL )
+ {
+#ifdef SMH_DEBUG
+ printk("scsi_release_request: req->sr_command = %p\n",
+ req->sr_command);
+#endif
+ scsi_release_command(req->sr_command);
+ req->sr_command = NULL;
+#ifdef SMHHACK
+ req->freeaddr = 0x1234;
+#endif
+ }
+
+ kfree(req);
+}
+
+/*
+ * Function: scsi_allocate_device
+ *
+ * Purpose: Allocate a command descriptor.
+ *
+ * Arguments: device - device for which we want a command descriptor
+ * wait - 1 if we should wait in the event that none
+ * are available.
+ * interruptible - 1 if we should unblock and return NULL
+ * in the event that we must wait, and a signal
+ * arrives.
+ *
+ * Lock status: No locks assumed to be held. This function is SMP-safe.
+ *
+ * Returns: Pointer to command descriptor.
+ *
+ * Notes: Prior to the new queue code, this function was not SMP-safe.
+ *
+ * If the wait flag is true, and we are waiting for a free
+ * command block, this function will interrupt and return
+ * NULL in the event that a signal arrives that needs to
+ * be handled.
+ *
+ * This function is deprecated, and drivers should be
+ * rewritten to use Scsi_Request instead of Scsi_Cmnd.
+ */
+
+Scsi_Cmnd *scsi_allocate_device(Scsi_Device * device, int wait,
+ int interruptable)
+{
+ struct Scsi_Host *host;
+ Scsi_Cmnd *SCpnt = NULL;
+ Scsi_Device *SDpnt;
+ unsigned long flags;
+
+ if (!device)
+ panic("No device passed to scsi_allocate_device().\n");
+
+ host = device->host;
+
+ spin_lock_irqsave(&device_request_lock, flags);
+
+ while (1 == 1) {
+ SCpnt = NULL;
+ if (!device->device_blocked) {
+ if (device->single_lun) {
+ /*
+ * FIXME(eric) - this is not at all optimal. Given that
+ * single lun devices are rare and usually slow
+ * (i.e. CD changers), this is good enough for now, but
+ * we may want to come back and optimize this later.
+ *
+ * Scan through all of the devices attached to this
+ * host, and see if any are active or not. If so,
+ * we need to defer this command.
+ *
+ * We really need a busy counter per device. This would
+ * allow us to more easily figure out whether we should
+ * do anything here or not.
+ */
+ for (SDpnt = host->host_queue;
+ SDpnt;
+ SDpnt = SDpnt->next) {
+ /*
+ * Only look for other devices on the same bus
+ * with the same target ID.
+ */
+ if (SDpnt->channel != device->channel
+ || SDpnt->id != device->id
+ || SDpnt == device) {
+ continue;
+ }
+ if( atomic_read(&SDpnt->device_active) != 0)
+ {
+ break;
+ }
+ }
+ if (SDpnt) {
+ /*
+ * Some other device in this cluster is busy.
+ * If asked to wait, we need to wait, otherwise
+ * return NULL.
+ */
+ SCpnt = NULL;
+ goto busy;
+ }
+ }
+ /*
+ * Now we can check for a free command block for this device.
+ */
+ for (SCpnt = device->device_queue; SCpnt; SCpnt = SCpnt->next) {
+ if (SCpnt->request.rq_status == RQ_INACTIVE)
+ break;
+ }
+ }
+ /*
+ * If we couldn't find a free command block, and we have been
+ * asked to wait, then do so.
+ */
+ if (SCpnt) {
+ break;
+ }
+ busy:
+ /*
+ * If we have been asked to wait for a free block, then
+ * wait here.
+ */
+ if (wait) {
+ printk("XXX smh: scsi cannot wait for free cmd block.\n");
+ BUG();
+#if 0
+ DECLARE_WAITQUEUE(wait, current);
+
+ /*
+ * We need to wait for a free commandblock. We need to
+ * insert ourselves into the list before we release the
+ * lock. This way if a block were released the same
+ * microsecond that we released the lock, the call
+ * to schedule() wouldn't block (well, it might switch,
+ * but the current task will still be schedulable.
+ */
+ add_wait_queue(&device->scpnt_wait, &wait);
+ if( interruptable ) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ } else {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ spin_unlock_irqrestore(&device_request_lock, flags);
+
+ /*
+ * This should block until a device command block
+ * becomes available.
+ */
+ schedule();
+
+ spin_lock_irqsave(&device_request_lock, flags);
+
+ remove_wait_queue(&device->scpnt_wait, &wait);
+ /*
+ * FIXME - Isn't this redundant?? Someone
+ * else will have forced the state back to running.
+ */
+ set_current_state(TASK_RUNNING);
+ /*
+ * In the event that a signal has arrived that we need
+ * to consider, then simply return NULL. Everyone
+ * that calls us should be prepared for this
+ * possibility, and pass the appropriate code back
+ * to the user.
+ */
+ if( interruptable ) {
+ if (signal_pending(current)) {
+ spin_unlock_irqrestore(&device_request_lock, flags);
+ return NULL;
+ }
+ }
+#endif
+ } else {
+ spin_unlock_irqrestore(&device_request_lock, flags);
+ return NULL;
+ }
+ }
+
+ SCpnt->request.rq_status = RQ_SCSI_BUSY;
+ SCpnt->request.waiting = NULL; /* And no one is waiting for this
+ * to complete */
+ atomic_inc(&SCpnt->host->host_active);
+ atomic_inc(&SCpnt->device->device_active);
+
+ SCpnt->buffer = NULL;
+ SCpnt->bufflen = 0;
+ SCpnt->request_buffer = NULL;
+ SCpnt->request_bufflen = 0;
+
+ SCpnt->use_sg = 0; /* Reset the scatter-gather flag */
+ SCpnt->old_use_sg = 0;
+ SCpnt->transfersize = 0; /* No default transfer size */
+ SCpnt->cmd_len = 0;
+
+ SCpnt->sc_data_direction = SCSI_DATA_UNKNOWN;
+ SCpnt->sc_request = NULL;
+ SCpnt->sc_magic = SCSI_CMND_MAGIC;
+
+ SCpnt->result = 0;
+ SCpnt->underflow = 0; /* Do not flag underflow conditions */
+ SCpnt->old_underflow = 0;
+ SCpnt->resid = 0;
+ SCpnt->state = SCSI_STATE_INITIALIZING;
+ SCpnt->owner = SCSI_OWNER_HIGHLEVEL;
+
+ spin_unlock_irqrestore(&device_request_lock, flags);
+
+ SCSI_LOG_MLQUEUE(5, printk("Activating command for device %d (%d)\n",
+ SCpnt->target,
+ atomic_read(&SCpnt->host->host_active)));
+
+ return SCpnt;
+}
+
+inline void __scsi_release_command(Scsi_Cmnd * SCpnt)
+{
+ unsigned long flags;
+ Scsi_Device * SDpnt;
+
+ spin_lock_irqsave(&device_request_lock, flags);
+
+ SDpnt = SCpnt->device;
+
+ SCpnt->request.rq_status = RQ_INACTIVE;
+ SCpnt->state = SCSI_STATE_UNUSED;
+ SCpnt->owner = SCSI_OWNER_NOBODY;
+ atomic_dec(&SCpnt->host->host_active);
+ atomic_dec(&SDpnt->device_active);
+
+ SCSI_LOG_MLQUEUE(5, printk("Deactivating command for device %d (active=%d, failed=%d)\n",
+ SCpnt->target,
+ atomic_read(&SCpnt->host->host_active),
+ SCpnt->host->host_failed));
+ if (SCpnt->host->host_failed != 0) {
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Error handler thread %d %d\n",
+ SCpnt->host->in_recovery,
+ SCpnt->host->eh_active));
+ }
+ /*
+ * If the host is having troubles, then look to see if this was the last
+ * command that might have failed. If so, wake up the error handler.
+ */
+ if (SCpnt->host->in_recovery
+ && !SCpnt->host->eh_active
+ && SCpnt->host->host_busy == SCpnt->host->host_failed) {
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n",
+ atomic_read(&SCpnt->host->eh_wait->count)));
+#if 0
+ up(SCpnt->host->eh_wait);
+#endif
+ }
+
+ spin_unlock_irqrestore(&device_request_lock, flags);
+
+#if 0
+ /*
+ * Wake up anyone waiting for this device. Do this after we
+ * have released the lock, as they will need it as soon as
+ * they wake up.
+ */
+ wake_up(&SDpnt->scpnt_wait);
+#endif
+
+}
+
+/*
+ * Function: scsi_release_command
+ *
+ * Purpose: Release a command block.
+ *
+ * Arguments: SCpnt - command block we are releasing.
+ *
+ * Notes: The command block can no longer be used by the caller once
+ * this funciton is called. This is in effect the inverse
+ * of scsi_allocate_device. Note that we also must perform
+ * a couple of additional tasks. We must first wake up any
+ * processes that might have blocked waiting for a command
+ * block, and secondly we must hit the queue handler function
+ * to make sure that the device is busy. Note - there is an
+ * option to not do this - there were instances where we could
+ * recurse too deeply and blow the stack if this happened
+ * when we were indirectly called from the request function
+ * itself.
+ *
+ * The idea is that a lot of the mid-level internals gunk
+ * gets hidden in this function. Upper level drivers don't
+ * have any chickens to wave in the air to get things to
+ * work reliably.
+ *
+ * This function is deprecated, and drivers should be
+ * rewritten to use Scsi_Request instead of Scsi_Cmnd.
+ */
+void scsi_release_command(Scsi_Cmnd * SCpnt)
+{
+ request_queue_t *q;
+ Scsi_Device * SDpnt;
+
+ SDpnt = SCpnt->device;
+
+ __scsi_release_command(SCpnt);
+
+ /*
+ * Finally, hit the queue request function to make sure that
+ * the device is actually busy if there are requests present.
+ * This won't block - if the device cannot take any more, life
+ * will go on.
+ */
+ q = &SDpnt->request_queue;
+ scsi_queue_next_request(q, NULL);
+}
+
+/*
+ * Function: scsi_dispatch_command
+ *
+ * Purpose: Dispatch a command to the low-level driver.
+ *
+ * Arguments: SCpnt - command block we are dispatching.
+ *
+ * Notes:
+ */
+int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt)
+{
+#ifdef DEBUG_DELAY
+ unsigned long clock;
+#endif
+ struct Scsi_Host *host;
+ int rtn = 0;
+ unsigned long flags = 0;
+ unsigned long timeout;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+#if DEBUG
+ unsigned long *ret = 0;
+#ifdef __mips__
+ __asm__ __volatile__("move\t%0,$31":"=r"(ret));
+#else
+ ret = __builtin_return_address(0);
+#endif
+#endif
+
+ host = SCpnt->host;
+
+ /* Assign a unique nonzero serial_number. */
+ if (++serial_number == 0)
+ serial_number = 1;
+ SCpnt->serial_number = serial_number;
+ SCpnt->pid = scsi_pid++;
+
+ /*
+ * We will wait MIN_RESET_DELAY clock ticks after the last reset so
+ * we can avoid the drive not being ready.
+ */
+ timeout = host->last_reset + MIN_RESET_DELAY;
+
+ if (host->resetting && time_before(jiffies, timeout)) {
+ int ticks_remaining = timeout - jiffies;
+ /*
+ * NOTE: This may be executed from within an interrupt
+ * handler! This is bad, but for now, it'll do. The irq
+ * level of the interrupt handler has been masked out by the
+ * platform dependent interrupt handling code already, so the
+ * sti() here will not cause another call to the SCSI host's
+ * interrupt handler (assuming there is one irq-level per
+ * host).
+ */
+ while (--ticks_remaining >= 0)
+ mdelay(1 + 999 / HZ);
+ host->resetting = 0;
+ }
+ if (host->hostt->use_new_eh_code) {
+ scsi_add_timer(SCpnt, SCpnt->timeout_per_command, scsi_times_out);
+ } else {
+#if 0
+ scsi_add_timer(SCpnt, SCpnt->timeout_per_command,
+ scsi_old_times_out);
+#endif
+ }
+
+ /*
+ * We will use a queued command if possible, otherwise we will emulate the
+ * queuing and calling of completion function ourselves.
+ */
+ SCSI_LOG_MLQUEUE(3, printk("scsi_dispatch_cmnd (host = %d, channel = %d, target = %d, "
+ "command = %p, buffer = %p, \nbufflen = %d, done = %p)\n",
+ SCpnt->host->host_no, SCpnt->channel, SCpnt->target, SCpnt->cmnd,
+ SCpnt->buffer, SCpnt->bufflen, SCpnt->done));
+
+ SCpnt->state = SCSI_STATE_QUEUED;
+ SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+ if (host->can_queue) {
+ SCSI_LOG_MLQUEUE(3, printk("queuecommand : routine at %p\n",
+ host->hostt->queuecommand));
+ /*
+ * Use the old error handling code if we haven't converted the driver
+ * to use the new one yet. Note - only the new queuecommand variant
+ * passes a meaningful return value.
+ */
+ if (host->hostt->use_new_eh_code) {
+ /*
+ * Before we queue this command, check if the command
+ * length exceeds what the host adapter can handle.
+ */
+ if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) {
+ spin_lock_irqsave(&io_request_lock, flags);
+ rtn = host->hostt->queuecommand(SCpnt, scsi_done);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ if (rtn != 0) {
+ scsi_delete_timer(SCpnt);
+ scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY);
+ SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n"));
+ }
+ } else {
+ SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n"));
+ SCpnt->result = (DID_ABORT << 16);
+ spin_lock_irqsave(&io_request_lock, flags);
+ scsi_done(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ rtn = 1;
+ }
+ } else {
+ /*
+ * Before we queue this command, check if the command
+ * length exceeds what the host adapter can handle.
+ */
+#if 0
+ if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) {
+ spin_lock_irqsave(&io_request_lock, flags);
+ host->hostt->queuecommand(SCpnt, scsi_old_done);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ } else {
+ SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n"));
+ SCpnt->result = (DID_ABORT << 16);
+ spin_lock_irqsave(&io_request_lock, flags);
+ scsi_old_done(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ rtn = 1;
+ }
+#endif
+
+ }
+ } else {
+ int temp;
+
+ SCSI_LOG_MLQUEUE(3, printk("command() : routine at %p\n", host->hostt->command));
+ spin_lock_irqsave(&io_request_lock, flags);
+ temp = host->hostt->command(SCpnt);
+ SCpnt->result = temp;
+#ifdef DEBUG_DELAY
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ clock = jiffies + 4 * HZ;
+ while (time_before(jiffies, clock)) {
+ barrier();
+ cpu_relax();
+ }
+ printk("done(host = %d, result = %04x) : routine at %p\n",
+ host->host_no, temp, host->hostt->command);
+ spin_lock_irqsave(&io_request_lock, flags);
+#endif
+ if (host->hostt->use_new_eh_code) {
+ scsi_done(SCpnt);
+ } else {
+#if 0
+ scsi_old_done(SCpnt);
+#endif
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ }
+ SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n"));
+ return rtn;
+}
+
+#ifdef DEVFS_MUST_DIE
+devfs_handle_t scsi_devfs_handle;
+#endif
+
+/*
+ * scsi_do_cmd sends all the commands out to the low-level driver. It
+ * handles the specifics required for each low level driver - ie queued
+ * or non queued. It also prevents conflicts when different high level
+ * drivers go for the same host at the same time.
+ */
+
+void scsi_wait_req (Scsi_Request * SRpnt, const void *cmnd ,
+ void *buffer, unsigned bufflen,
+ int timeout, int retries)
+{
+#if 0
+ DECLARE_COMPLETION(wait);
+#endif
+
+
+ request_queue_t *q = &SRpnt->sr_device->request_queue;
+
+#if 0
+ SRpnt->sr_request.waiting = &wait;
+#endif
+
+
+ SRpnt->sr_request.rq_status = RQ_SCSI_BUSY;
+ scsi_do_req (SRpnt, (void *) cmnd,
+ buffer, bufflen, scsi_wait_done, timeout, retries);
+ generic_unplug_device(q);
+
+
+#if 0
+ wait_for_completion(&wait);
+#endif
+
+ /* XXX SMH: in 'standard' driver we think everythings ok here since
+ we've waited on &wait -- hence we deallocate the command structure
+ if it hasn't been done already. This is not the correct behaviour
+ in xen ... hmm .. how to fix? */
+ mdelay(500);
+
+
+ SRpnt->sr_request.waiting = NULL;
+
+ if( SRpnt->sr_command != NULL )
+ {
+#ifdef SMH_DEBUG
+ printk("scsi_wait_req: releasing SRpnt->sr_command = %p\n",
+ SRpnt->sr_command);
+#endif
+ scsi_release_command(SRpnt->sr_command);
+ SRpnt->sr_command = NULL;
+#ifdef SMHHACK
+ SRpnt->freeaddr = 0x99991234;
+#endif
+ }
+
+}
+
+/*
+ * Function: scsi_do_req
+ *
+ * Purpose: Queue a SCSI request
+ *
+ * Arguments: SRpnt - command descriptor.
+ * cmnd - actual SCSI command to be performed.
+ * buffer - data buffer.
+ * bufflen - size of data buffer.
+ * done - completion function to be run.
+ * timeout - how long to let it run before timeout.
+ * retries - number of retries we allow.
+ *
+ * Lock status: With the new queueing code, this is SMP-safe, and no locks
+ * need be held upon entry. The old queueing code the lock was
+ * assumed to be held upon entry.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: Prior to the new queue code, this function was not SMP-safe.
+ * Also, this function is now only used for queueing requests
+ * for things like ioctls and character device requests - this
+ * is because we essentially just inject a request into the
+ * queue for the device. Normal block device handling manipulates
+ * the queue directly.
+ */
+void scsi_do_req(Scsi_Request * SRpnt, const void *cmnd,
+ void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *),
+ int timeout, int retries)
+{
+ Scsi_Device * SDpnt = SRpnt->sr_device;
+ struct Scsi_Host *host = SDpnt->host;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ SCSI_LOG_MLQUEUE(4,
+ {
+ int i;
+ int target = SDpnt->id;
+ int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]);
+ printk("scsi_do_req (host = %d, channel = %d target = %d, "
+ "buffer =%p, bufflen = %d, done = %p, timeout = %d, "
+ "retries = %d)\n"
+ "command : ", host->host_no, SDpnt->channel, target, buffer,
+ bufflen, done, timeout, retries);
+ for (i = 0; i < size; ++i)
+ printk("%02x ", ((unsigned char *) cmnd)[i]);
+ printk("\n");
+ });
+
+ if (!host) {
+ panic("Invalid or not present host.\n");
+ }
+
+ /*
+ * If the upper level driver is reusing these things, then
+ * we should release the low-level block now. Another one will
+ * be allocated later when this request is getting queued.
+ */
+ if( SRpnt->sr_command != NULL )
+ {
+#ifdef SMH_DEBUG
+ printk("scsi_do_req: releasing SRpnt->sr_command = %p\n",
+ SRpnt->sr_command);
+#endif
+ scsi_release_command(SRpnt->sr_command);
+ SRpnt->sr_command = NULL;
+#ifdef SMHHACK
+ SRpnt->freeaddr = 0xabbadead;
+#endif
+ }
+
+ /*
+ * We must prevent reentrancy to the lowlevel host driver.
+ * This prevents it - we enter a loop until the host we want
+ * to talk to is not busy. Race conditions are prevented, as
+ * interrupts are disabled in between the time we check for
+ * the host being not busy, and the time we mark it busy
+ * ourselves. */
+
+
+ /*
+ * Our own function scsi_done (which marks the host as not
+ * busy, disables the timeout counter, etc) will be called by
+ * us or by the scsi_hosts[host].queuecommand() function needs
+ * to also call the completion function for the high level
+ * driver. */
+
+ memcpy((void *) SRpnt->sr_cmnd, (const void *) cmnd,
+ sizeof(SRpnt->sr_cmnd));
+#ifdef SMHHACK
+ SRpnt->freeaddr = 0x1111;
+#endif
+
+ SRpnt->sr_bufflen = bufflen;
+ SRpnt->sr_buffer = buffer;
+ SRpnt->sr_allowed = retries;
+ SRpnt->sr_done = done;
+ SRpnt->sr_timeout_per_command = timeout;
+
+ if (SRpnt->sr_cmd_len == 0)
+ SRpnt->sr_cmd_len = COMMAND_SIZE(SRpnt->sr_cmnd[0]);
+
+ /*
+ * At this point, we merely set up the command, stick it in the normal
+ * request queue, and return. Eventually that request will come to the
+ * top of the list, and will be dispatched.
+ */
+ scsi_insert_special_req(SRpnt, 0);
+
+ SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_req()\n"));
+}
+
+/*
+ * Function: scsi_init_cmd_from_req
+ *
+ * Purpose: Queue a SCSI command
+ * Purpose: Initialize a Scsi_Cmnd from a Scsi_Request
+ *
+ * Arguments: SCpnt - command descriptor.
+ * SRpnt - Request from the queue.
+ *
+ * Lock status: None needed.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: Mainly transfer data from the request structure to the
+ * command structure. The request structure is allocated
+ * using the normal memory allocator, and requests can pile
+ * up to more or less any depth. The command structure represents
+ * a consumable resource, as these are allocated into a pool
+ * when the SCSI subsystem initializes. The preallocation is
+ * required so that in low-memory situations a disk I/O request
+ * won't cause the memory manager to try and write out a page.
+ * The request structure is generally used by ioctls and character
+ * devices.
+ */
+void scsi_init_cmd_from_req(Scsi_Cmnd * SCpnt, Scsi_Request * SRpnt)
+{
+ struct Scsi_Host *host = SCpnt->host;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+ SRpnt->sr_command = SCpnt;
+#ifdef SMH_DEBUG
+ printk("scsi_init_cmd_from_req: SRpnt = %p, SRpnt->sr_command = %p\n",
+ SRpnt, SRpnt->sr_command);
+#endif
+
+ if (!host) {
+ panic("Invalid or not present host.\n");
+ }
+
+ SCpnt->cmd_len = SRpnt->sr_cmd_len;
+ SCpnt->use_sg = SRpnt->sr_use_sg;
+
+ memcpy((void *) &SCpnt->request, (const void *) &SRpnt->sr_request,
+ sizeof(SRpnt->sr_request));
+ memcpy((void *) SCpnt->data_cmnd, (const void *) SRpnt->sr_cmnd,
+ sizeof(SCpnt->data_cmnd));
+ SCpnt->reset_chain = NULL;
+ SCpnt->serial_number = 0;
+ SCpnt->serial_number_at_timeout = 0;
+ SCpnt->bufflen = SRpnt->sr_bufflen;
+ SCpnt->buffer = SRpnt->sr_buffer;
+ SCpnt->flags = 0;
+ SCpnt->retries = 0;
+ SCpnt->allowed = SRpnt->sr_allowed;
+ SCpnt->done = SRpnt->sr_done;
+ SCpnt->timeout_per_command = SRpnt->sr_timeout_per_command;
+
+ SCpnt->sc_data_direction = SRpnt->sr_data_direction;
+
+ SCpnt->sglist_len = SRpnt->sr_sglist_len;
+ SCpnt->underflow = SRpnt->sr_underflow;
+
+ SCpnt->sc_request = SRpnt;
+
+ memcpy((void *) SCpnt->cmnd, (const void *) SRpnt->sr_cmnd,
+ sizeof(SCpnt->cmnd));
+ /* Zero the sense buffer. Some host adapters automatically request
+ * sense on error. 0 is not a valid sense code.
+ */
+ memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+ SCpnt->request_buffer = SRpnt->sr_buffer;
+ SCpnt->request_bufflen = SRpnt->sr_bufflen;
+ SCpnt->old_use_sg = SCpnt->use_sg;
+ if (SCpnt->cmd_len == 0)
+ SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+ SCpnt->old_cmd_len = SCpnt->cmd_len;
+ SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+ SCpnt->old_underflow = SCpnt->underflow;
+
+ /* Start the timer ticking. */
+
+ SCpnt->internal_timeout = NORMAL_TIMEOUT;
+ SCpnt->abort_reason = 0;
+ SCpnt->result = 0;
+
+ SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_init_cmd_from_req()\n"));
+}
+
+/*
+ * Function: scsi_do_cmd
+ *
+ * Purpose: Queue a SCSI command
+ *
+ * Arguments: SCpnt - command descriptor.
+ * cmnd - actual SCSI command to be performed.
+ * buffer - data buffer.
+ * bufflen - size of data buffer.
+ * done - completion function to be run.
+ * timeout - how long to let it run before timeout.
+ * retries - number of retries we allow.
+ *
+ * Lock status: With the new queueing code, this is SMP-safe, and no locks
+ * need be held upon entry. The old queueing code the lock was
+ * assumed to be held upon entry.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: Prior to the new queue code, this function was not SMP-safe.
+ * Also, this function is now only used for queueing requests
+ * for things like ioctls and character device requests - this
+ * is because we essentially just inject a request into the
+ * queue for the device. Normal block device handling manipulates
+ * the queue directly.
+ */
+void scsi_do_cmd(Scsi_Cmnd * SCpnt, const void *cmnd,
+ void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *),
+ int timeout, int retries)
+{
+ struct Scsi_Host *host = SCpnt->host;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ SCpnt->pid = scsi_pid++;
+ SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+
+ SCSI_LOG_MLQUEUE(4,
+ {
+ int i;
+ int target = SCpnt->target;
+ int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]);
+ printk("scsi_do_cmd (host = %d, channel = %d target = %d, "
+ "buffer =%p, bufflen = %d, done = %p, timeout = %d, "
+ "retries = %d)\n"
+ "command : ", host->host_no, SCpnt->channel, target, buffer,
+ bufflen, done, timeout, retries);
+ for (i = 0; i < size; ++i)
+ printk("%02x ", ((unsigned char *) cmnd)[i]);
+ printk("\n");
+ });
+
+ if (!host) {
+ panic("Invalid or not present host.\n");
+ }
+ /*
+ * We must prevent reentrancy to the lowlevel host driver. This prevents
+ * it - we enter a loop until the host we want to talk to is not busy.
+ * Race conditions are prevented, as interrupts are disabled in between the
+ * time we check for the host being not busy, and the time we mark it busy
+ * ourselves.
+ */
+
+
+ /*
+ * Our own function scsi_done (which marks the host as not busy, disables
+ * the timeout counter, etc) will be called by us or by the
+ * scsi_hosts[host].queuecommand() function needs to also call
+ * the completion function for the high level driver.
+ */
+
+ memcpy((void *) SCpnt->data_cmnd, (const void *) cmnd,
+ sizeof(SCpnt->data_cmnd));
+ SCpnt->reset_chain = NULL;
+ SCpnt->serial_number = 0;
+ SCpnt->serial_number_at_timeout = 0;
+ SCpnt->bufflen = bufflen;
+ SCpnt->buffer = buffer;
+ SCpnt->flags = 0;
+ SCpnt->retries = 0;
+ SCpnt->allowed = retries;
+ SCpnt->done = done;
+ SCpnt->timeout_per_command = timeout;
+
+ memcpy((void *) SCpnt->cmnd, (const void *) cmnd,
+ sizeof(SCpnt->cmnd));
+ /* Zero the sense buffer. Some host adapters automatically request
+ * sense on error. 0 is not a valid sense code.
+ */
+ memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+ SCpnt->request_buffer = buffer;
+ SCpnt->request_bufflen = bufflen;
+ SCpnt->old_use_sg = SCpnt->use_sg;
+ if (SCpnt->cmd_len == 0)
+ SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+ SCpnt->old_cmd_len = SCpnt->cmd_len;
+ SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+ SCpnt->old_underflow = SCpnt->underflow;
+
+ /* Start the timer ticking. */
+
+ SCpnt->internal_timeout = NORMAL_TIMEOUT;
+ SCpnt->abort_reason = 0;
+ SCpnt->result = 0;
+
+ /*
+ * At this point, we merely set up the command, stick it in the normal
+ * request queue, and return. Eventually that request will come to the
+ * top of the list, and will be dispatched.
+ */
+ scsi_insert_special_cmd(SCpnt, 0);
+
+ SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_cmd()\n"));
+}
+
+/*
+ * This function is the mid-level interrupt routine, which decides how
+ * to handle error conditions. Each invocation of this function must
+ * do one and *only* one of the following:
+ *
+ * 1) Insert command in BH queue.
+ * 2) Activate error handler for host.
+ *
+ * FIXME(eric) - I am concerned about stack overflow (still). An
+ * interrupt could come while we are processing the bottom queue,
+ * which would cause another command to be stuffed onto the bottom
+ * queue, and it would in turn be processed as that interrupt handler
+ * is returning. Given a sufficiently steady rate of returning
+ * commands, this could cause the stack to overflow. I am not sure
+ * what is the most appropriate solution here - we should probably
+ * keep a depth count, and not process any commands while we still
+ * have a bottom handler active higher in the stack.
+ *
+ * There is currently code in the bottom half handler to monitor
+ * recursion in the bottom handler and report if it ever happens. If
+ * this becomes a problem, it won't be hard to engineer something to
+ * deal with it so that only the outer layer ever does any real
+ * processing.
+ */
+void scsi_done(Scsi_Cmnd * SCpnt)
+{
+ unsigned long flags;
+ int tstatus;
+
+ /*
+ * We don't have to worry about this one timing out any more.
+ */
+ tstatus = scsi_delete_timer(SCpnt);
+
+ /*
+ * If we are unable to remove the timer, it means that the command
+ * has already timed out. In this case, we have no choice but to
+ * let the timeout function run, as we have no idea where in fact
+ * that function could really be. It might be on another processor,
+ * etc, etc.
+ */
+ if (!tstatus) {
+ SCpnt->done_late = 1;
+ return;
+ }
+ /* Set the serial numbers back to zero */
+ SCpnt->serial_number = 0;
+
+ /*
+ * First, see whether this command already timed out. If so, we ignore
+ * the response. We treat it as if the command never finished.
+ *
+ * Since serial_number is now 0, the error handler cound detect this
+ * situation and avoid to call the low level driver abort routine.
+ * (DB)
+ *
+ * FIXME(eric) - I believe that this test is now redundant, due to
+ * the test of the return status of del_timer().
+ */
+ if (SCpnt->state == SCSI_STATE_TIMEOUT) {
+ SCSI_LOG_MLCOMPLETE(1, printk("Ignoring completion of %p due to timeout status", SCpnt));
+ return;
+ }
+ spin_lock_irqsave(&scsi_bhqueue_lock, flags);
+
+ SCpnt->serial_number_at_timeout = 0;
+ SCpnt->state = SCSI_STATE_BHQUEUE;
+ SCpnt->owner = SCSI_OWNER_BH_HANDLER;
+ SCpnt->bh_next = NULL;
+
+ /*
+ * Next, put this command in the BH queue.
+ *
+ * We need a spinlock here, or compare and exchange if we can reorder incoming
+ * Scsi_Cmnds, as it happens pretty often scsi_done is called multiple times
+ * before bh is serviced. -jj
+ *
+ * We already have the io_request_lock here, since we are called from the
+ * interrupt handler or the error handler. (DB)
+ *
+ * This may be true at the moment, but I would like to wean all of the low
+ * level drivers away from using io_request_lock. Technically they should
+ * all use their own locking. I am adding a small spinlock to protect
+ * this datastructure to make it safe for that day. (ERY)
+ */
+ if (!scsi_bh_queue_head) {
+ scsi_bh_queue_head = SCpnt;
+ scsi_bh_queue_tail = SCpnt;
+ } else {
+ scsi_bh_queue_tail->bh_next = SCpnt;
+ scsi_bh_queue_tail = SCpnt;
+ }
+
+ spin_unlock_irqrestore(&scsi_bhqueue_lock, flags);
+ /*
+ * Mark the bottom half handler to be run.
+ */
+ mark_bh(SCSI_BH);
+}
+
+/*
+ * Procedure: scsi_bottom_half_handler
+ *
+ * Purpose: Called after we have finished processing interrupts, it
+ * performs post-interrupt handling for commands that may
+ * have completed.
+ *
+ * Notes: This is called with all interrupts enabled. This should reduce
+ * interrupt latency, stack depth, and reentrancy of the low-level
+ * drivers.
+ *
+ * The io_request_lock is required in all the routine. There was a subtle
+ * race condition when scsi_done is called after a command has already
+ * timed out but before the time out is processed by the error handler.
+ * (DB)
+ *
+ * I believe I have corrected this. We simply monitor the return status of
+ * del_timer() - if this comes back as 0, it means that the timer has fired
+ * and that a timeout is in progress. I have modified scsi_done() such
+ * that in this instance the command is never inserted in the bottom
+ * half queue. Thus the only time we hold the lock here is when
+ * we wish to atomically remove the contents of the queue.
+ */
+void scsi_bottom_half_handler(void)
+{
+ Scsi_Cmnd *SCpnt;
+ Scsi_Cmnd *SCnext;
+ unsigned long flags;
+
+
+ while (1 == 1) {
+ spin_lock_irqsave(&scsi_bhqueue_lock, flags);
+ SCpnt = scsi_bh_queue_head;
+ scsi_bh_queue_head = NULL;
+ spin_unlock_irqrestore(&scsi_bhqueue_lock, flags);
+
+ if (SCpnt == NULL) {
+ return;
+ }
+ SCnext = SCpnt->bh_next;
+
+ for (; SCpnt; SCpnt = SCnext) {
+ SCnext = SCpnt->bh_next;
+
+ switch (scsi_decide_disposition(SCpnt)) {
+ case SUCCESS:
+ /*
+ * Add to BH queue.
+ */
+ SCSI_LOG_MLCOMPLETE(3,
+ printk("Command finished %d %d 0x%x\n",
+ SCpnt->host->host_busy,
+ SCpnt->host->host_failed,
+ SCpnt->result));
+
+ scsi_finish_command(SCpnt);
+ break;
+ case NEEDS_RETRY:
+ /*
+ * We only come in here if we want to retry a command.
+ * The test to see whether the command should be
+ * retried should be keeping track of the number of
+ * tries, so we don't end up looping, of course. */
+ SCSI_LOG_MLCOMPLETE(3,
+ printk("Command needs retry %d %d 0x%x\n",
+ SCpnt->host->host_busy,
+ SCpnt->host->host_failed,
+ SCpnt->result));
+
+ scsi_retry_command(SCpnt);
+ break;
+ case ADD_TO_MLQUEUE:
+ /*
+ * This typically happens for a QUEUE_FULL message -
+ * typically only when the queue depth is only
+ * approximate for a given device. Adding a command
+ * to the queue for the device will prevent further commands
+ * from being sent to the device, so we shouldn't end up
+ * with tons of things being sent down that shouldn't be.
+ */
+ SCSI_LOG_MLCOMPLETE(3, printk(
+ "Cmnd rejected as device queue full, put on ml queue %p\n",
+ SCpnt));
+ scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_DEVICE_BUSY);
+ break;
+ default:
+ /*
+ * Here we have a fatal error of some sort. Turn it over to
+ * the error handler.
+ */
+ SCSI_LOG_MLCOMPLETE(3, printk(
+ "Command failed %p %x active=%d busy=%d failed=%d\n",
+ SCpnt, SCpnt->result,
+ atomic_read(&SCpnt->host->host_active),
+ SCpnt->host->host_busy,
+ SCpnt->host->host_failed));
+
+ /*
+ * Dump the sense information too.
+ */
+ if ((status_byte(SCpnt->result) & CHECK_CONDITION) != 0) {
+ SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt));
+ }
+ if (SCpnt->host->eh_wait != NULL) {
+ SCpnt->host->host_failed++;
+ SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+ SCpnt->state = SCSI_STATE_FAILED;
+ SCpnt->host->in_recovery = 1;
+ /*
+ * If the host is having troubles, then look to
+ * see if this was the last command that might
+ * have failed. If so, wake up the error handler. */
+ if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
+ SCSI_LOG_ERROR_RECOVERY(5, printk(
+ "Waking error handler thread (%d)\n",
+ atomic_read(&SCpnt->host->eh_wait->count)));
+#if 0
+ up(SCpnt->host->eh_wait);
+#endif
+ }
+ } else {
+ /*
+ * We only get here if the error recovery thread has died.
+ */
+ printk("scsi_bh: error finish\n");
+ scsi_finish_command(SCpnt);
+ }
+ }
+ } /* for(; SCpnt...) */
+
+ } /* while(1==1) */
+
+}
+
+/*
+ * Function: scsi_retry_command
+ *
+ * Purpose: Send a command back to the low level to be retried.
+ *
+ * Notes: This command is always executed in the context of the
+ * bottom half handler, or the error handler thread. Low
+ * level drivers should not become re-entrant as a result of
+ * this.
+ */
+int scsi_retry_command(Scsi_Cmnd * SCpnt)
+{
+ memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+ sizeof(SCpnt->data_cmnd));
+ SCpnt->request_buffer = SCpnt->buffer;
+ SCpnt->request_bufflen = SCpnt->bufflen;
+ SCpnt->use_sg = SCpnt->old_use_sg;
+ SCpnt->cmd_len = SCpnt->old_cmd_len;
+ SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+ SCpnt->underflow = SCpnt->old_underflow;
+
+ /*
+ * Zero the sense information from the last time we tried
+ * this command.
+ */
+ memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+
+ return scsi_dispatch_cmd(SCpnt);
+}
+
+/*
+ * Function: scsi_finish_command
+ *
+ * Purpose: Pass command off to upper layer for finishing of I/O
+ * request, waking processes that are waiting on results,
+ * etc.
+ */
+void scsi_finish_command(Scsi_Cmnd * SCpnt)
+{
+ struct Scsi_Host *host;
+ Scsi_Device *device;
+ Scsi_Request * SRpnt;
+ unsigned long flags;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ host = SCpnt->host;
+ device = SCpnt->device;
+
+ /*
+ * We need to protect the decrement, as otherwise a race condition
+ * would exist. Fiddling with SCpnt isn't a problem as the
+ * design only allows a single SCpnt to be active in only
+ * one execution context, but the device and host structures are
+ * shared.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+ host->host_busy--; /* Indicate that we are free */
+ device->device_busy--; /* Decrement device usage counter. */
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ /*
+ * Clear the flags which say that the device/host is no longer
+ * capable of accepting new commands. These are set in scsi_queue.c
+ * for both the queue full condition on a device, and for a
+ * host full condition on the host.
+ */
+ host->host_blocked = FALSE;
+ device->device_blocked = FALSE;
+
+ /*
+ * If we have valid sense information, then some kind of recovery
+ * must have taken place. Make a note of this.
+ */
+ if (scsi_sense_valid(SCpnt)) {
+ SCpnt->result |= (DRIVER_SENSE << 24);
+ }
+ SCSI_LOG_MLCOMPLETE(3, printk(
+ "Notifying upper driver of completion for device %d %x\n",
+ SCpnt->device->id, SCpnt->result));
+
+ SCpnt->owner = SCSI_OWNER_HIGHLEVEL;
+ SCpnt->state = SCSI_STATE_FINISHED;
+
+ /* We can get here with use_sg=0, causing a panic in the
+ upper level (DB) */
+ SCpnt->use_sg = SCpnt->old_use_sg;
+
+ /*
+ * If there is an associated request structure, copy the data over
+ * before we call the * completion function.
+ */
+ SRpnt = SCpnt->sc_request;
+
+ if( SRpnt != NULL ) {
+ if(!SRpnt->sr_command) {
+ printk("scsi_finish_command: SRpnt=%p, SRpnt->sr_command=%p\n",
+ SRpnt, SRpnt->sr_command);
+ printk("SRpnt->freeaddr = %p\n", SRpnt->freeaddr);
+ BUG();
+ }
+ SRpnt->sr_result = SRpnt->sr_command->result;
+ if( SRpnt->sr_result != 0 ) {
+ memcpy(SRpnt->sr_sense_buffer,
+ SRpnt->sr_command->sense_buffer,
+ sizeof(SRpnt->sr_sense_buffer));
+ }
+ }
+
+ SCpnt->done(SCpnt);
+}
+
+static int scsi_register_host(Scsi_Host_Template *);
+static int scsi_unregister_host(Scsi_Host_Template *);
+
+/*
+ * Function: scsi_release_commandblocks()
+ *
+ * Purpose: Release command blocks associated with a device.
+ *
+ * Arguments: SDpnt - device
+ *
+ * Returns: Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:
+ */
+void scsi_release_commandblocks(Scsi_Device * SDpnt)
+{
+ Scsi_Cmnd *SCpnt, *SCnext;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_request_lock, flags);
+ for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCnext) {
+ SDpnt->device_queue = SCnext = SCpnt->next;
+ kfree((char *) SCpnt);
+ }
+ SDpnt->has_cmdblocks = 0;
+ SDpnt->queue_depth = 0;
+ spin_unlock_irqrestore(&device_request_lock, flags);
+}
+
+/*
+ * Function: scsi_build_commandblocks()
+ *
+ * Purpose: Allocate command blocks associated with a device.
+ *
+ * Arguments: SDpnt - device
+ *
+ * Returns: Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:
+ */
+void scsi_build_commandblocks(Scsi_Device * SDpnt)
+{
+ unsigned long flags;
+ struct Scsi_Host *host = SDpnt->host;
+ int j;
+ Scsi_Cmnd *SCpnt;
+
+ spin_lock_irqsave(&device_request_lock, flags);
+
+ if (SDpnt->queue_depth == 0)
+ {
+ SDpnt->queue_depth = host->cmd_per_lun;
+ if (SDpnt->queue_depth == 0)
+ SDpnt->queue_depth = 1; /* live to fight another day */
+ }
+ SDpnt->device_queue = NULL;
+
+ for (j = 0; j < SDpnt->queue_depth; j++) {
+ SCpnt = (Scsi_Cmnd *)
+ kmalloc(sizeof(Scsi_Cmnd),
+ GFP_ATOMIC |
+ (host->unchecked_isa_dma ? GFP_DMA : 0));
+ if (NULL == SCpnt)
+ break; /* If not, the next line will oops ... */
+ memset(SCpnt, 0, sizeof(Scsi_Cmnd));
+ SCpnt->host = host;
+ SCpnt->device = SDpnt;
+ SCpnt->target = SDpnt->id;
+ SCpnt->lun = SDpnt->lun;
+ SCpnt->channel = SDpnt->channel;
+ SCpnt->request.rq_status = RQ_INACTIVE;
+ SCpnt->use_sg = 0;
+ SCpnt->old_use_sg = 0;
+ SCpnt->old_cmd_len = 0;
+ SCpnt->underflow = 0;
+ SCpnt->old_underflow = 0;
+ SCpnt->transfersize = 0;
+ SCpnt->resid = 0;
+ SCpnt->serial_number = 0;
+ SCpnt->serial_number_at_timeout = 0;
+ SCpnt->host_scribble = NULL;
+ SCpnt->next = SDpnt->device_queue;
+ SDpnt->device_queue = SCpnt;
+ SCpnt->state = SCSI_STATE_UNUSED;
+ SCpnt->owner = SCSI_OWNER_NOBODY;
+ }
+ if (j < SDpnt->queue_depth) { /* low on space (D.Gilbert 990424) */
+ printk(KERN_WARNING "scsi_build_commandblocks: want=%d, space for=%d blocks\n",
+ SDpnt->queue_depth, j);
+ SDpnt->queue_depth = j;
+ SDpnt->has_cmdblocks = (0 != j);
+ } else {
+ SDpnt->has_cmdblocks = 1;
+ }
+ spin_unlock_irqrestore(&device_request_lock, flags);
+}
+
+void __init scsi_host_no_insert(char *str, int n)
+{
+ Scsi_Host_Name *shn, *shn2;
+ int len;
+
+ len = strlen(str);
+ if (len && (shn = (Scsi_Host_Name *) kmalloc(sizeof(Scsi_Host_Name), GFP_ATOMIC))) {
+ if ((shn->name = kmalloc(len+1, GFP_ATOMIC))) {
+ strncpy(shn->name, str, len);
+ shn->name[len] = 0;
+ shn->host_no = n;
+ shn->host_registered = 0;
+ shn->loaded_as_module = 1; /* numbers shouldn't be freed in any case */
+ shn->next = NULL;
+ if (scsi_host_no_list) {
+ for (shn2 = scsi_host_no_list;shn2->next;shn2 = shn2->next)
+ ;
+ shn2->next = shn;
+ }
+ else
+ scsi_host_no_list = shn;
+ max_scsi_hosts = n+1;
+ }
+ else
+ kfree((char *) shn);
+ }
+}
+
+#ifdef CONFIG_PROC_FS
+static int scsi_proc_info(char *buffer, char **start, off_t offset, int length)
+{
+ Scsi_Device *scd;
+ struct Scsi_Host *HBA_ptr;
+ int size, len = 0;
+ off_t begin = 0;
+ off_t pos = 0;
+
+ /*
+ * First, see if there are any attached devices or not.
+ */
+ for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+ if (HBA_ptr->host_queue != NULL) {
+ break;
+ }
+ }
+ size = sprintf(buffer + len, "Attached devices: %s\n", (HBA_ptr) ? "" : "none");
+ len += size;
+ pos = begin + len;
+ for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+#if 0
+ size += sprintf(buffer + len, "scsi%2d: %s\n", (int) HBA_ptr->host_no,
+ HBA_ptr->hostt->procname);
+ len += size;
+ pos = begin + len;
+#endif
+ for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+ proc_print_scsidevice(scd, buffer, &size, len);
+ len += size;
+ pos = begin + len;
+
+ if (pos < offset) {
+ len = 0;
+ begin = pos;
+ }
+ if (pos > offset + length)
+ goto stop_output;
+ }
+ }
+
+stop_output:
+ *start = buffer + (offset - begin); /* Start of wanted data */
+ len -= (offset - begin); /* Start slop */
+ if (len > length)
+ len = length; /* Ending slop */
+ return (len);
+}
+
+static int proc_scsi_gen_write(struct file * file, const char * buf,
+ unsigned long length, void *data)
+{
+ struct Scsi_Device_Template *SDTpnt;
+ Scsi_Device *scd;
+ struct Scsi_Host *HBA_ptr;
+ char *p;
+ int host, channel, id, lun;
+ char * buffer;
+ int err;
+
+ if (!buf || length>PAGE_SIZE)
+ return -EINVAL;
+
+ if (!(buffer = (char *) __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ if(copy_from_user(buffer, buf, length))
+ {
+ err =-EFAULT;
+ goto out;
+ }
+
+ err = -EINVAL;
+
+ if (length < PAGE_SIZE)
+ buffer[length] = '\0';
+ else if (buffer[PAGE_SIZE-1])
+ goto out;
+
+ if (length < 11 || strncmp("scsi", buffer, 4))
+ goto out;
+
+ /*
+ * Usage: echo "scsi dump #N" > /proc/scsi/scsi
+ * to dump status of all scsi commands. The number is used to specify the level
+ * of detail in the dump.
+ */
+ if (!strncmp("dump", buffer + 5, 4)) {
+ unsigned int level;
+
+ p = buffer + 10;
+
+ if (*p == '\0')
+ goto out;
+
+ level = simple_strtoul(p, NULL, 0);
+ scsi_dump_status(level);
+ }
+ /*
+ * Usage: echo "scsi log token #N" > /proc/scsi/scsi
+ * where token is one of [error,scan,mlqueue,mlcomplete,llqueue,
+ * llcomplete,hlqueue,hlcomplete]
+ */
+#ifdef CONFIG_SCSI_LOGGING /* { */
+
+ if (!strncmp("log", buffer + 5, 3)) {
+ char *token;
+ unsigned int level;
+
+ p = buffer + 9;
+ token = p;
+ while (*p != ' ' && *p != '\t' && *p != '\0') {
+ p++;
+ }
+
+ if (*p == '\0') {
+ if (strncmp(token, "all", 3) == 0) {
+ /*
+ * Turn on absolutely everything.
+ */
+ scsi_logging_level = ~0;
+ } else if (strncmp(token, "none", 4) == 0) {
+ /*
+ * Turn off absolutely everything.
+ */
+ scsi_logging_level = 0;
+ } else {
+ goto out;
+ }
+ } else {
+ *p++ = '\0';
+
+ level = simple_strtoul(p, NULL, 0);
+
+ /*
+ * Now figure out what to do with it.
+ */
+ if (strcmp(token, "error") == 0) {
+ SCSI_SET_ERROR_RECOVERY_LOGGING(level);
+ } else if (strcmp(token, "timeout") == 0) {
+ SCSI_SET_TIMEOUT_LOGGING(level);
+ } else if (strcmp(token, "scan") == 0) {
+ SCSI_SET_SCAN_BUS_LOGGING(level);
+ } else if (strcmp(token, "mlqueue") == 0) {
+ SCSI_SET_MLQUEUE_LOGGING(level);
+ } else if (strcmp(token, "mlcomplete") == 0) {
+ SCSI_SET_MLCOMPLETE_LOGGING(level);
+ } else if (strcmp(token, "llqueue") == 0) {
+ SCSI_SET_LLQUEUE_LOGGING(level);
+ } else if (strcmp(token, "llcomplete") == 0) {
+ SCSI_SET_LLCOMPLETE_LOGGING(level);
+ } else if (strcmp(token, "hlqueue") == 0) {
+ SCSI_SET_HLQUEUE_LOGGING(level);
+ } else if (strcmp(token, "hlcomplete") == 0) {
+ SCSI_SET_HLCOMPLETE_LOGGING(level);
+ } else if (strcmp(token, "ioctl") == 0) {
+ SCSI_SET_IOCTL_LOGGING(level);
+ } else {
+ goto out;
+ }
+ }
+
+ printk(KERN_INFO "scsi logging level set to 0x%8.8x\n", scsi_logging_level);
+ }
+#endif /* CONFIG_SCSI_LOGGING */ /* } */
+
+ /*
+ * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
+ * with "0 1 2 3" replaced by your "Host Channel Id Lun".
+ * Consider this feature BETA.
+ * CAUTION: This is not for hotplugging your peripherals. As
+ * SCSI was not designed for this you could damage your
+ * hardware !
+ * However perhaps it is legal to switch on an
+ * already connected device. It is perhaps not
+ * guaranteed this device doesn't corrupt an ongoing data transfer.
+ */
+ if (!strncmp("add-single-device", buffer + 5, 17)) {
+ p = buffer + 23;
+
+ host = simple_strtoul(p, &p, 0);
+ channel = simple_strtoul(p + 1, &p, 0);
+ id = simple_strtoul(p + 1, &p, 0);
+ lun = simple_strtoul(p + 1, &p, 0);
+
+ printk(KERN_INFO "scsi singledevice %d %d %d %d\n", host, channel,
+ id, lun);
+
+ for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+ if (HBA_ptr->host_no == host) {
+ break;
+ }
+ }
+ err = -ENXIO;
+ if (!HBA_ptr)
+ goto out;
+
+ for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+ if ((scd->channel == channel
+ && scd->id == id
+ && scd->lun == lun)) {
+ break;
+ }
+ }
+
+ err = -ENOSYS;
+ if (scd)
+ goto out; /* We do not yet support unplugging */
+
+ scan_scsis(HBA_ptr, 1, channel, id, lun);
+
+ /* FIXME (DB) This assumes that the queue_depth routines can be used
+ in this context as well, while they were all designed to be
+ called only once after the detect routine. (DB) */
+ /* queue_depth routine moved to inside scan_scsis(,1,,,) so
+ it is called before build_commandblocks() */
+
+ err = length;
+ goto out;
+ }
+ /*
+ * Usage: echo "scsi remove-single-device 0 1 2 3" >/proc/scsi/scsi
+ * with "0 1 2 3" replaced by your "Host Channel Id Lun".
+ *
+ * Consider this feature pre-BETA.
+ *
+ * CAUTION: This is not for hotplugging your peripherals. As
+ * SCSI was not designed for this you could damage your
+ * hardware and thoroughly confuse the SCSI subsystem.
+ *
+ */
+ else if (!strncmp("remove-single-device", buffer + 5, 20)) {
+ p = buffer + 26;
+
+ host = simple_strtoul(p, &p, 0);
+ channel = simple_strtoul(p + 1, &p, 0);
+ id = simple_strtoul(p + 1, &p, 0);
+ lun = simple_strtoul(p + 1, &p, 0);
+
+
+ for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+ if (HBA_ptr->host_no == host) {
+ break;
+ }
+ }
+ err = -ENODEV;
+ if (!HBA_ptr)
+ goto out;
+
+ for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+ if ((scd->channel == channel
+ && scd->id == id
+ && scd->lun == lun)) {
+ break;
+ }
+ }
+
+ if (scd == NULL)
+ goto out; /* there is no such device attached */
+
+ err = -EBUSY;
+ if (scd->access_count)
+ goto out;
+
+ SDTpnt = scsi_devicelist;
+ while (SDTpnt != NULL) {
+ if (SDTpnt->detach)
+ (*SDTpnt->detach) (scd);
+ SDTpnt = SDTpnt->next;
+ }
+
+ if (scd->attached == 0) {
+ /*
+ * Nobody is using this device any more.
+ * Free all of the command structures.
+ */
+ if (HBA_ptr->hostt->revoke)
+ HBA_ptr->hostt->revoke(scd);
+#ifdef DEVFS_MUST_DIE
+ devfs_unregister (scd->de);
+#endif
+ scsi_release_commandblocks(scd);
+
+ /* Now we can remove the device structure */
+ if (scd->next != NULL)
+ scd->next->prev = scd->prev;
+
+ if (scd->prev != NULL)
+ scd->prev->next = scd->next;
+
+ if (HBA_ptr->host_queue == scd) {
+ HBA_ptr->host_queue = scd->next;
+ }
+ blk_cleanup_queue(&scd->request_queue);
+ kfree((char *) scd);
+ } else {
+ goto out;
+ }
+ err = 0;
+ }
+out:
+
+ free_page((unsigned long) buffer);
+ return err;
+}
+#endif
+
+/*
+ * This entry point should be called by a driver if it is trying
+ * to add a low level scsi driver to the system.
+ */
+static int scsi_register_host(Scsi_Host_Template * tpnt)
+{
+ int pcount;
+ struct Scsi_Host *shpnt;
+ Scsi_Device *SDpnt;
+ struct Scsi_Device_Template *sdtpnt;
+ const char *name;
+ unsigned long flags;
+ int out_of_space = 0;
+
+ if (tpnt->next || !tpnt->detect)
+ return 1; /* Must be already loaded, or
+ * no detect routine available
+ */
+
+ /* If max_sectors isn't set, default to max */
+ if (!tpnt->max_sectors)
+ tpnt->max_sectors = MAX_SECTORS;
+
+ pcount = next_scsi_host;
+
+ MOD_INC_USE_COUNT;
+
+ /* The detect routine must carefully spinunlock/spinlock if
+ it enables interrupts, since all interrupt handlers do
+ spinlock as well.
+ All lame drivers are going to fail due to the following
+ spinlock. For the time beeing let's use it only for drivers
+ using the new scsi code. NOTE: the detect routine could
+ redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */
+
+ if (tpnt->use_new_eh_code) {
+ spin_lock_irqsave(&io_request_lock, flags);
+ tpnt->present = tpnt->detect(tpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ } else
+ tpnt->present = tpnt->detect(tpnt);
+
+ if (tpnt->present) {
+ if (pcount == next_scsi_host) {
+ if (tpnt->present > 1) {
+ printk(KERN_ERR "scsi: Failure to register low-level "
+ "scsi driver");
+ scsi_unregister_host(tpnt);
+ return 1;
+ }
+ /*
+ * The low-level driver failed to register a driver.
+ * We can do this now.
+ */
+ if(scsi_register(tpnt, 0)==NULL)
+ {
+ printk(KERN_ERR "scsi: register failed.\n");
+ scsi_unregister_host(tpnt);
+ return 1;
+ }
+ }
+ tpnt->next = scsi_hosts; /* Add to the linked list */
+ scsi_hosts = tpnt;
+
+ /* Add the new driver to /proc/scsi */
+#ifdef CONFIG_PROC_FS
+ build_proc_dir_entries(tpnt);
+#endif
+
+
+#if 0
+ /*
+ * Add the kernel threads for each host adapter that will
+ * handle error correction.
+ */
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ if (shpnt->hostt == tpnt && shpnt->hostt->use_new_eh_code) {
+ DECLARE_MUTEX_LOCKED(sem);
+
+ shpnt->eh_notify = &sem;
+ kernel_thread((int (*)(void *)) scsi_error_handler,
+ (void *) shpnt, 0);
+
+ /*
+ * Now wait for the kernel error thread to initialize itself
+ * as it might be needed when we scan the bus.
+ */
+ down(&sem);
+ shpnt->eh_notify = NULL;
+ }
+ }
+#endif
+
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ if (shpnt->hostt == tpnt) {
+ if (tpnt->info) {
+ name = tpnt->info(shpnt);
+ } else {
+ name = tpnt->name;
+ }
+ printk(KERN_INFO "scsi%d : %s\n", /* And print a little message */
+ shpnt->host_no, name);
+ }
+ }
+
+ /* The next step is to call scan_scsis here. This generates the
+ * Scsi_Devices entries
+ */
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ if (shpnt->hostt == tpnt) {
+ scan_scsis(shpnt, 0, 0, 0, 0);
+ if (shpnt->select_queue_depths != NULL) {
+ (shpnt->select_queue_depths) (shpnt, shpnt->host_queue);
+ }
+ }
+ }
+
+ for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+ if (sdtpnt->init && sdtpnt->dev_noticed)
+ (*sdtpnt->init) ();
+ }
+
+ /*
+ * Next we create the Scsi_Cmnd structures for this host
+ */
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next)
+ if (SDpnt->host->hostt == tpnt) {
+ for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+ if (sdtpnt->attach)
+ (*sdtpnt->attach) (SDpnt);
+ if (SDpnt->attached) {
+ scsi_build_commandblocks(SDpnt);
+ if (0 == SDpnt->has_cmdblocks)
+ out_of_space = 1;
+ }
+ }
+ }
+
+ /*
+ * Now that we have all of the devices, resize the DMA pool,
+ * as required. */
+ if (!out_of_space)
+ scsi_resize_dma_pool();
+
+
+ /* This does any final handling that is required. */
+ for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+ if (sdtpnt->finish && sdtpnt->nr_dev) {
+ (*sdtpnt->finish) ();
+ }
+ }
+ }
+#if defined(USE_STATIC_SCSI_MEMORY)
+ printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n",
+ (scsi_memory_upper_value - scsi_memory_lower_value) / 1024,
+ (scsi_init_memory_start - scsi_memory_lower_value) / 1024,
+ (scsi_memory_upper_value - scsi_init_memory_start) / 1024);
+#endif
+
+ if (out_of_space) {
+ scsi_unregister_host(tpnt); /* easiest way to clean up?? */
+ return 1;
+ } else
+ return 0;
+}
+
+
+/*
+ * Similarly, this entry point should be called by a loadable module if it
+ * is trying to remove a low level scsi driver from the system.
+ */
+static int scsi_unregister_host(Scsi_Host_Template * tpnt)
+{
+ int online_status;
+ int pcount0, pcount;
+ Scsi_Cmnd *SCpnt;
+ Scsi_Device *SDpnt;
+ Scsi_Device *SDpnt1;
+ struct Scsi_Device_Template *sdtpnt;
+ struct Scsi_Host *sh1;
+ struct Scsi_Host *shpnt;
+ char name[10]; /* host_no>=10^9? I don't think so. */
+
+#if 0
+ /* get the big kernel lock, so we don't race with open() */
+ lock_kernel();
+#endif
+
+ /*
+ * First verify that this host adapter is completely free with no pending
+ * commands
+ */
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ if (SDpnt->host->hostt == tpnt
+ && SDpnt->host->hostt->module
+ && GET_USE_COUNT(SDpnt->host->hostt->module))
+ goto err_out;
+ /*
+ * FIXME(eric) - We need to find a way to notify the
+ * low level driver that we are shutting down - via the
+ * special device entry that still needs to get added.
+ *
+ * Is detach interface below good enough for this?
+ */
+ }
+ }
+
+ /*
+ * FIXME(eric) put a spinlock on this. We force all of the devices offline
+ * to help prevent race conditions where other hosts/processors could try and
+ * get in and queue a command.
+ */
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ if (SDpnt->host->hostt == tpnt)
+ SDpnt->online = FALSE;
+
+ }
+ }
+
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ if (shpnt->hostt != tpnt) {
+ continue;
+ }
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ /*
+ * Loop over all of the commands associated with the device. If any of
+ * them are busy, then set the state back to inactive and bail.
+ */
+ for (SCpnt = SDpnt->device_queue; SCpnt;
+ SCpnt = SCpnt->next) {
+ online_status = SDpnt->online;
+ SDpnt->online = FALSE;
+ if (SCpnt->request.rq_status != RQ_INACTIVE) {
+ printk(KERN_ERR "SCSI device not inactive - rq_status=%d, target=%d, pid=%ld, state=%d, owner=%d.\n",
+ SCpnt->request.rq_status, SCpnt->target, SCpnt->pid,
+ SCpnt->state, SCpnt->owner);
+ for (SDpnt1 = shpnt->host_queue; SDpnt1;
+ SDpnt1 = SDpnt1->next) {
+ for (SCpnt = SDpnt1->device_queue; SCpnt;
+ SCpnt = SCpnt->next)
+ if (SCpnt->request.rq_status == RQ_SCSI_DISCONNECTING)
+ SCpnt->request.rq_status = RQ_INACTIVE;
+ }
+ SDpnt->online = online_status;
+ printk(KERN_ERR "Device busy???\n");
+ goto err_out;
+ }
+ /*
+ * No, this device is really free. Mark it as such, and
+ * continue on.
+ */
+ SCpnt->state = SCSI_STATE_DISCONNECTING;
+ SCpnt->request.rq_status = RQ_SCSI_DISCONNECTING; /* Mark as busy */
+ }
+ }
+ }
+ /* Next we detach the high level drivers from the Scsi_Device structures */
+
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ if (shpnt->hostt != tpnt) {
+ continue;
+ }
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+ if (sdtpnt->detach)
+ (*sdtpnt->detach) (SDpnt);
+
+ /* If something still attached, punt */
+ if (SDpnt->attached) {
+ printk(KERN_ERR "Attached usage count = %d\n", SDpnt->attached);
+ goto err_out;
+ }
+#ifdef DEVFS_MUST_DIE
+ devfs_unregister (SDpnt->de);
+#endif
+ }
+ }
+
+#if 0
+ /*
+ * Next, kill the kernel error recovery thread for this host.
+ */
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ if (shpnt->hostt == tpnt
+ && shpnt->hostt->use_new_eh_code
+ && shpnt->ehandler != NULL) {
+ DECLARE_MUTEX_LOCKED(sem);
+
+ shpnt->eh_notify = &sem;
+ send_sig(SIGHUP, shpnt->ehandler, 1);
+ down(&sem);
+ shpnt->eh_notify = NULL;
+ }
+ }
+#endif
+
+ /* Next we free up the Scsi_Cmnd structures for this host */
+
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ if (shpnt->hostt != tpnt) {
+ continue;
+ }
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = shpnt->host_queue) {
+ scsi_release_commandblocks(SDpnt);
+
+ blk_cleanup_queue(&SDpnt->request_queue);
+ /* Next free up the Scsi_Device structures for this host */
+ shpnt->host_queue = SDpnt->next;
+ kfree((char *) SDpnt);
+
+ }
+ }
+
+ /* Next we go through and remove the instances of the individual hosts
+ * that were detected */
+
+ pcount0 = next_scsi_host;
+ for (shpnt = scsi_hostlist; shpnt; shpnt = sh1) {
+ sh1 = shpnt->next;
+ if (shpnt->hostt != tpnt)
+ continue;
+ pcount = next_scsi_host;
+ /* Remove the /proc/scsi directory entry */
+ sprintf(name,"%d",shpnt->host_no);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry(name, tpnt->proc_dir);
+#endif
+ if (tpnt->release)
+ (*tpnt->release) (shpnt);
+ else {
+ /* This is the default case for the release function.
+ * It should do the right thing for most correctly
+ * written host adapters.
+ */
+ if (shpnt->irq)
+ free_irq(shpnt->irq, NULL);
+
+#if 0
+ if (shpnt->dma_channel != 0xff)
+ free_dma(shpnt->dma_channel);
+#endif
+ if (shpnt->io_port && shpnt->n_io_port)
+ release_region(shpnt->io_port, shpnt->n_io_port);
+ }
+ if (pcount == next_scsi_host)
+ scsi_unregister(shpnt);
+ tpnt->present--;
+ }
+
+ /*
+ * If there are absolutely no more hosts left, it is safe
+ * to completely nuke the DMA pool. The resize operation will
+ * do the right thing and free everything.
+ */
+ if (!scsi_hosts)
+ scsi_resize_dma_pool();
+
+ if (pcount0 != next_scsi_host)
+ printk(KERN_INFO "scsi : %d host%s left.\n", next_scsi_host,
+ (next_scsi_host == 1) ? "" : "s");
+
+#if defined(USE_STATIC_SCSI_MEMORY)
+ printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n",
+ (scsi_memory_upper_value - scsi_memory_lower_value) / 1024,
+ (scsi_init_memory_start - scsi_memory_lower_value) / 1024,
+ (scsi_memory_upper_value - scsi_init_memory_start) / 1024);
+#endif
+
+ /*
+ * Remove it from the linked list and /proc if all
+ * hosts were successfully removed (ie preset == 0)
+ */
+ if (!tpnt->present) {
+ Scsi_Host_Template **SHTp = &scsi_hosts;
+ Scsi_Host_Template *SHT;
+
+ while ((SHT = *SHTp) != NULL) {
+ if (SHT == tpnt) {
+ *SHTp = SHT->next;
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry(tpnt->proc_name, proc_scsi);
+#endif
+ break;
+ }
+ SHTp = &SHT->next;
+ }
+ }
+ MOD_DEC_USE_COUNT;
+
+#if 0
+ unlock_kernel();
+#endif
+ return 0;
+
+err_out:
+
+#if 0
+ unlock_kernel();
+#endif
+ return -1;
+}
+
+static int scsi_unregister_device(struct Scsi_Device_Template *tpnt);
+
+/*
+ * This entry point should be called by a loadable module if it is trying
+ * add a high level scsi driver to the system.
+ */
+static int scsi_register_device_module(struct Scsi_Device_Template *tpnt)
+{
+ Scsi_Device *SDpnt;
+ struct Scsi_Host *shpnt;
+ int out_of_space = 0;
+
+ if (tpnt->next)
+ return 1;
+
+ scsi_register_device(tpnt);
+ /*
+ * First scan the devices that we know about, and see if we notice them.
+ */
+
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ if (tpnt->detect)
+ SDpnt->detected = (*tpnt->detect) (SDpnt);
+ }
+ }
+
+ /*
+ * If any of the devices would match this driver, then perform the
+ * init function.
+ */
+ if (tpnt->init && tpnt->dev_noticed) {
+ if ((*tpnt->init) ()) {
+ for (shpnt = scsi_hostlist; shpnt;
+ shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ SDpnt->detected = 0;
+ }
+ }
+ scsi_deregister_device(tpnt);
+ return 1;
+ }
+ }
+
+ /*
+ * Now actually connect the devices to the new driver.
+ */
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ SDpnt->attached += SDpnt->detected;
+ SDpnt->detected = 0;
+ if (tpnt->attach)
+ (*tpnt->attach) (SDpnt);
+ /*
+ * If this driver attached to the device, and don't have any
+ * command blocks for this device, allocate some.
+ */
+ if (SDpnt->attached && SDpnt->has_cmdblocks == 0) {
+ SDpnt->online = TRUE;
+ scsi_build_commandblocks(SDpnt);
+ if (0 == SDpnt->has_cmdblocks)
+ out_of_space = 1;
+ }
+ }
+ }
+
+ /*
+ * This does any final handling that is required.
+ */
+ if (tpnt->finish && tpnt->nr_dev)
+ (*tpnt->finish) ();
+ if (!out_of_space)
+ scsi_resize_dma_pool();
+ MOD_INC_USE_COUNT;
+
+ if (out_of_space) {
+ scsi_unregister_device(tpnt); /* easiest way to clean up?? */
+ return 1;
+ } else
+ return 0;
+}
+
+static int scsi_unregister_device(struct Scsi_Device_Template *tpnt)
+{
+ Scsi_Device *SDpnt;
+ struct Scsi_Host *shpnt;
+
+#if 0
+ lock_kernel();
+#endif
+ /*
+ * If we are busy, this is not going to fly.
+ */
+ if (GET_USE_COUNT(tpnt->module) != 0)
+ goto error_out;
+
+ /*
+ * Next, detach the devices from the driver.
+ */
+
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt;
+ SDpnt = SDpnt->next) {
+ if (tpnt->detach)
+ (*tpnt->detach) (SDpnt);
+ if (SDpnt->attached == 0) {
+ SDpnt->online = FALSE;
+
+ /*
+ * Nobody is using this device any more. Free all of the
+ * command structures.
+ */
+ scsi_release_commandblocks(SDpnt);
+ }
+ }
+ }
+ /*
+ * Extract the template from the linked list.
+ */
+ scsi_deregister_device(tpnt);
+
+ MOD_DEC_USE_COUNT;
+#if 0
+ unlock_kernel();
+#endif
+
+ /*
+ * Final cleanup for the driver is done in the driver sources in the
+ * cleanup function.
+ */
+ return 0;
+error_out:
+#if 0
+ unlock_kernel();
+#endif
+ return -1;
+}
+
+
+/* This function should be called by drivers which needs to register
+ * with the midlevel scsi system. As of 2.4.0-test9pre3 this is our
+ * main device/hosts register function /mathiasen
+ */
+int scsi_register_module(int module_type, void *ptr)
+{
+ switch (module_type) {
+ case MODULE_SCSI_HA:
+ return scsi_register_host((Scsi_Host_Template *) ptr);
+
+ /* Load upper level device handler of some kind */
+ case MODULE_SCSI_DEV:
+#ifdef CONFIG_KMOD
+ if (scsi_hosts == NULL)
+ request_module("scsi_hostadapter");
+#endif
+ return scsi_register_device_module((struct Scsi_Device_Template *) ptr);
+ /* The rest of these are not yet implemented */
+
+ /* Load constants.o */
+ case MODULE_SCSI_CONST:
+
+ /* Load specialized ioctl handler for some device. Intended for
+ * cdroms that have non-SCSI2 audio command sets. */
+ case MODULE_SCSI_IOCTL:
+
+ default:
+ return 1;
+ }
+}
+
+/* Reverse the actions taken above
+ */
+int scsi_unregister_module(int module_type, void *ptr)
+{
+ int retval = 0;
+
+ switch (module_type) {
+ case MODULE_SCSI_HA:
+ retval = scsi_unregister_host((Scsi_Host_Template *) ptr);
+ break;
+ case MODULE_SCSI_DEV:
+ retval = scsi_unregister_device((struct Scsi_Device_Template *)ptr);
+ break;
+ /* The rest of these are not yet implemented. */
+ case MODULE_SCSI_CONST:
+ case MODULE_SCSI_IOCTL:
+ break;
+ default:;
+ }
+ return retval;
+}
+
+#ifdef CONFIG_PROC_FS
+/*
+ * Function: scsi_dump_status
+ *
+ * Purpose: Brain dump of scsi system, used for problem solving.
+ *
+ * Arguments: level - used to indicate level of detail.
+ *
+ * Notes: The level isn't used at all yet, but we need to find some way
+ * of sensibly logging varying degrees of information. A quick one-line
+ * display of each command, plus the status would be most useful.
+ *
+ * This does depend upon CONFIG_SCSI_LOGGING - I do want some way of turning
+ * it all off if the user wants a lean and mean kernel. It would probably
+ * also be useful to allow the user to specify one single host to be dumped.
+ * A second argument to the function would be useful for that purpose.
+ *
+ * FIXME - some formatting of the output into tables would be very handy.
+ */
+static void scsi_dump_status(int level)
+{
+#ifdef CONFIG_SCSI_LOGGING /* { */
+ int i;
+ struct Scsi_Host *shpnt;
+ Scsi_Cmnd *SCpnt;
+ Scsi_Device *SDpnt;
+ printk(KERN_INFO "Dump of scsi host parameters:\n");
+ i = 0;
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ printk(KERN_INFO " %d %d %d : %d %d\n",
+ shpnt->host_failed,
+ shpnt->host_busy,
+ atomic_read(&shpnt->host_active),
+ shpnt->host_blocked,
+ shpnt->host_self_blocked);
+ }
+
+ printk(KERN_INFO "\n\n");
+ printk(KERN_INFO "Dump of scsi command parameters:\n");
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ printk(KERN_INFO "h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result\n");
+ for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+ /* (0) h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result %d %x */
+ printk(KERN_INFO "(%3d) %2d:%1d:%2d:%2d (%6s %4ld %4ld %4ld %4x %1d) (%1d %1d 0x%2x) (%4d %4d %4d) 0x%2.2x 0x%2.2x 0x%8.8x\n",
+ i++,
+
+ SCpnt->host->host_no,
+ SCpnt->channel,
+ SCpnt->target,
+ SCpnt->lun,
+
+ kdevname(SCpnt->request.rq_dev),
+ SCpnt->request.sector,
+ SCpnt->request.nr_sectors,
+ SCpnt->request.current_nr_sectors,
+ SCpnt->request.rq_status,
+ SCpnt->use_sg,
+
+ SCpnt->retries,
+ SCpnt->allowed,
+ SCpnt->flags,
+
+ SCpnt->timeout_per_command,
+ SCpnt->timeout,
+ SCpnt->internal_timeout,
+
+ SCpnt->cmnd[0],
+ SCpnt->sense_buffer[2],
+ SCpnt->result);
+ }
+ }
+ }
+
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+ for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ /* Now dump the request lists for each block device */
+ printk(KERN_INFO "Dump of pending block device requests\n");
+ for (i = 0; i < MAX_BLKDEV; i++) {
+ struct list_head * queue_head;
+
+ queue_head = &blk_dev[i].request_queue.queue_head;
+ if (!list_empty(queue_head)) {
+ struct request *req;
+ struct list_head * entry;
+
+ printk(KERN_INFO "%d: ", i);
+ entry = queue_head->next;
+ do {
+ req = blkdev_entry_to_request(entry);
+ printk("(%s %d %ld %ld %ld) ",
+ kdevname(req->rq_dev),
+ req->cmd,
+ req->sector,
+ req->nr_sectors,
+ req->current_nr_sectors);
+ } while ((entry = entry->next) != queue_head);
+ printk("\n");
+ }
+ }
+ }
+ }
+#endif /* CONFIG_SCSI_LOGGING */ /* } */
+}
+#endif /* CONFIG_PROC_FS */
+
+static int __init scsi_host_no_init (char *str)
+{
+ static int next_no = 0;
+ char *temp;
+
+ while (str) {
+ temp = str;
+ while (*temp && (*temp != ':') && (*temp != ','))
+ temp++;
+ if (!*temp)
+ temp = NULL;
+ else
+ *temp++ = 0;
+ scsi_host_no_insert(str, next_no);
+ str = temp;
+ next_no++;
+ }
+ return 1;
+}
+
+static char *scsihosts;
+
+MODULE_PARM(scsihosts, "s");
+MODULE_DESCRIPTION("SCSI core");
+MODULE_LICENSE("GPL");
+
+#ifndef MODULE
+int __init scsi_setup(char *str)
+{
+ scsihosts = str;
+ return 1;
+}
+
+__setup("scsihosts=", scsi_setup);
+#endif
+
+static spinlock_t slock2 = SPIN_LOCK_UNLOCKED;
+
+static int __init init_scsi(void)
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *generic;
+#endif
+
+ printk(KERN_INFO "SCSI subsystem driver " REVISION "\n");
+
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&slock2, flags);
+ spin_unlock_irqrestore(&slock2, flags);
+ printk("SCSI start of day -- flags = %lx\n", flags);
+ }
+
+ if( scsi_init_minimal_dma_pool() != 0 )
+ {
+ return 1;
+ }
+
+#ifdef CONFIG_PROC_FS
+ /*
+ * This makes /proc/scsi and /proc/scsi/scsi visible.
+ */
+ proc_scsi = proc_mkdir("scsi", 0);
+ if (!proc_scsi) {
+ printk (KERN_ERR "cannot init /proc/scsi\n");
+ return -ENOMEM;
+ }
+ generic = create_proc_info_entry ("scsi/scsi", 0, 0, scsi_proc_info);
+ if (!generic) {
+ printk (KERN_ERR "cannot init /proc/scsi/scsi\n");
+ remove_proc_entry("scsi", 0);
+ return -ENOMEM;
+ }
+ generic->write_proc = proc_scsi_gen_write;
+#endif
+
+#ifdef DEVFS_MUST_DIE
+ scsi_devfs_handle = devfs_mk_dir (NULL, "scsi", NULL);
+#endif
+ if (scsihosts)
+ printk(KERN_INFO "scsi: host order: %s\n", scsihosts);
+ scsi_host_no_init (scsihosts);
+ /*
+ * This is where the processing takes place for most everything
+ * when commands are completed.
+ */
+ init_bh(SCSI_BH, scsi_bottom_half_handler);
+
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&slock2, flags);
+ spin_unlock_irqrestore(&slock2, flags);
+ printk("SCSI end of day -- flags = %lx\n", flags);
+ }
+
+
+ return 0;
+}
+
+static void __exit exit_scsi(void)
+{
+ Scsi_Host_Name *shn, *shn2 = NULL;
+
+ remove_bh(SCSI_BH);
+
+#ifdef DEVFS_MUST_DIE
+ devfs_unregister (scsi_devfs_handle);
+#endif
+ for (shn = scsi_host_no_list;shn;shn = shn->next) {
+ if (shn->name)
+ kfree(shn->name);
+ if (shn2)
+ kfree (shn2);
+ shn2 = shn;
+ }
+ if (shn2)
+ kfree (shn2);
+
+#ifdef CONFIG_PROC_FS
+ /* No, we're not here anymore. Don't show the /proc/scsi files. */
+ remove_proc_entry ("scsi/scsi", 0);
+ remove_proc_entry ("scsi", 0);
+#endif
+
+ /*
+ * Free up the DMA pool.
+ */
+ scsi_resize_dma_pool();
+
+}
+
+module_init(init_scsi);
+module_exit(exit_scsi);
+
+/*
+ * Function: scsi_get_host_dev()
+ *
+ * Purpose: Create a Scsi_Device that points to the host adapter itself.
+ *
+ * Arguments: SHpnt - Host that needs a Scsi_Device
+ *
+ * Lock status: None assumed.
+ *
+ * Returns: The Scsi_Device or NULL
+ *
+ * Notes:
+ */
+Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt)
+{
+ Scsi_Device * SDpnt;
+
+ /*
+ * Attach a single Scsi_Device to the Scsi_Host - this should
+ * be made to look like a "pseudo-device" that points to the
+ * HA itself. For the moment, we include it at the head of
+ * the host_queue itself - I don't think we want to show this
+ * to the HA in select_queue_depths(), as this would probably confuse
+ * matters.
+ * Note - this device is not accessible from any high-level
+ * drivers (including generics), which is probably not
+ * optimal. We can add hooks later to attach
+ */
+ SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device),
+ GFP_ATOMIC);
+ if(SDpnt == NULL)
+ return NULL;
+
+ memset(SDpnt, 0, sizeof(Scsi_Device));
+
+ SDpnt->host = SHpnt;
+ SDpnt->id = SHpnt->this_id;
+ SDpnt->type = -1;
+ SDpnt->queue_depth = 1;
+
+ scsi_build_commandblocks(SDpnt);
+
+ scsi_initialize_queue(SDpnt, SHpnt);
+
+ SDpnt->online = TRUE;
+
+#if 0
+ /*
+ * Initialize the object that we will use to wait for command blocks.
+ */
+ init_waitqueue_head(&SDpnt->scpnt_wait);
+#endif
+ return SDpnt;
+}
+
+/*
+ * Function: scsi_free_host_dev()
+ *
+ * Purpose: Create a Scsi_Device that points to the host adapter itself.
+ *
+ * Arguments: SHpnt - Host that needs a Scsi_Device
+ *
+ * Lock status: None assumed.
+ *
+ * Returns: Nothing
+ *
+ * Notes:
+ */
+void scsi_free_host_dev(Scsi_Device * SDpnt)
+{
+ if( (unsigned char) SDpnt->id != (unsigned char) SDpnt->host->this_id )
+ {
+ panic("Attempt to delete wrong device\n");
+ }
+
+ blk_cleanup_queue(&SDpnt->request_queue);
+
+ /*
+ * We only have a single SCpnt attached to this device. Free
+ * it now.
+ */
+ scsi_release_commandblocks(SDpnt);
+ kfree(SDpnt);
+}
+
+/*
+ * Function: scsi_reset_provider_done_command
+ *
+ * Purpose: Dummy done routine.
+ *
+ * Notes: Some low level drivers will call scsi_done and end up here,
+ * others won't bother.
+ * We don't want the bogus command used for the bus/device
+ * reset to find its way into the mid-layer so we intercept
+ * it here.
+ */
+static void
+scsi_reset_provider_done_command(Scsi_Cmnd *SCpnt)
+{
+}
+
+/*
+ * Function: scsi_reset_provider
+ *
+ * Purpose: Send requested reset to a bus or device at any phase.
+ *
+ * Arguments: device - device to send reset to
+ * flag - reset type (see scsi.h)
+ *
+ * Returns: SUCCESS/FAILURE.
+ *
+ * Notes: This is used by the SCSI Generic driver to provide
+ * Bus/Device reset capability.
+ */
+int
+scsi_reset_provider(Scsi_Device *dev, int flag)
+{
+ Scsi_Cmnd SC, *SCpnt = &SC;
+ int rtn;
+
+ memset(&SCpnt->eh_timeout, 0, sizeof(SCpnt->eh_timeout));
+ SCpnt->host = dev->host;
+ SCpnt->device = dev;
+ SCpnt->target = dev->id;
+ SCpnt->lun = dev->lun;
+ SCpnt->channel = dev->channel;
+ SCpnt->request.rq_status = RQ_SCSI_BUSY;
+ SCpnt->request.waiting = NULL;
+ SCpnt->use_sg = 0;
+ SCpnt->old_use_sg = 0;
+ SCpnt->old_cmd_len = 0;
+ SCpnt->underflow = 0;
+ SCpnt->transfersize = 0;
+ SCpnt->resid = 0;
+ SCpnt->serial_number = 0;
+ SCpnt->serial_number_at_timeout = 0;
+ SCpnt->host_scribble = NULL;
+ SCpnt->next = NULL;
+ SCpnt->state = SCSI_STATE_INITIALIZING;
+ SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+
+ memset(&SCpnt->cmnd, '\0', sizeof(SCpnt->cmnd));
+
+ SCpnt->scsi_done = scsi_reset_provider_done_command;
+ SCpnt->done = NULL;
+ SCpnt->reset_chain = NULL;
+
+ SCpnt->buffer = NULL;
+ SCpnt->bufflen = 0;
+ SCpnt->request_buffer = NULL;
+ SCpnt->request_bufflen = 0;
+
+ SCpnt->internal_timeout = NORMAL_TIMEOUT;
+ SCpnt->abort_reason = DID_ABORT;
+
+ SCpnt->cmd_len = 0;
+
+ SCpnt->sc_data_direction = SCSI_DATA_UNKNOWN;
+ SCpnt->sc_request = NULL;
+ SCpnt->sc_magic = SCSI_CMND_MAGIC;
+
+ /*
+ * Sometimes the command can get back into the timer chain,
+ * so use the pid as an identifier.
+ */
+ SCpnt->pid = 0;
+
+ if (dev->host->hostt->use_new_eh_code) {
+ rtn = scsi_new_reset(SCpnt, flag);
+ } else {
+#if 0
+ unsigned long flags;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ rtn = scsi_old_reset(SCpnt, flag);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+#endif
+ }
+
+ scsi_delete_timer(SCpnt);
+ return rtn;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi.h b/xen/drivers/scsi/scsi.h
new file mode 100644
index 0000000000..338bca8f7b
--- /dev/null
+++ b/xen/drivers/scsi/scsi.h
@@ -0,0 +1,896 @@
+/*
+ * scsi.h Copyright (C) 1992 Drew Eckhardt
+ * Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale
+ * generic SCSI package header file by
+ * Initial versions: Drew Eckhardt
+ * Subsequent revisions: Eric Youngdale
+ *
+ * <drew@colorado.edu>
+ *
+ * Modified by Eric Youngdale eric@andante.org to
+ * add scatter-gather, multiple outstanding request, and other
+ * enhancements.
+ */
+
+#ifndef _SCSI_H
+#define _SCSI_H
+
+#include <xeno/config.h> /* for CONFIG_SCSI_LOGGING */
+/*#include <xeno/devfs_fs_kernel.h>*/
+/*#include <xeno/proc_fs.h>*/
+
+/*
+ * Some of the public constants are being moved to this file.
+ * We include it here so that what came from where is transparent.
+ */
+#include <scsi/scsi.h>
+
+/*#include <xeno/random.h>*/
+
+#include <asm/hardirq.h>
+#include <asm/scatterlist.h>
+#include <asm/io.h>
+
+/*
+ * These are the values that the SCpnt->sc_data_direction and
+ * SRpnt->sr_data_direction can take. These need to be set
+ * The SCSI_DATA_UNKNOWN value is essentially the default.
+ * In the event that the command creator didn't bother to
+ * set a value, you will see SCSI_DATA_UNKNOWN.
+ */
+#define SCSI_DATA_UNKNOWN 0
+#define SCSI_DATA_WRITE 1
+#define SCSI_DATA_READ 2
+#define SCSI_DATA_NONE 3
+
+#ifdef CONFIG_PCI
+#include <xeno/pci.h>
+#if ((SCSI_DATA_UNKNOWN == PCI_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == PCI_DMA_TODEVICE) && (SCSI_DATA_READ == PCI_DMA_FROMDEVICE) && (SCSI_DATA_NONE == PCI_DMA_NONE))
+#define scsi_to_pci_dma_dir(scsi_dir) ((int)(scsi_dir))
+#else
+extern __inline__ int scsi_to_pci_dma_dir(unsigned char scsi_dir)
+{
+ if (scsi_dir == SCSI_DATA_UNKNOWN)
+ return PCI_DMA_BIDIRECTIONAL;
+ if (scsi_dir == SCSI_DATA_WRITE)
+ return PCI_DMA_TODEVICE;
+ if (scsi_dir == SCSI_DATA_READ)
+ return PCI_DMA_FROMDEVICE;
+ return PCI_DMA_NONE;
+}
+#endif
+#endif
+
+#if defined(CONFIG_SBUS) && !defined(CONFIG_SUN3) && !defined(CONFIG_SUN3X)
+#include <asm/sbus.h>
+#if ((SCSI_DATA_UNKNOWN == SBUS_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == SBUS_DMA_TODEVICE) && (SCSI_DATA_READ == SBUS_DMA_FROMDEVICE) && (SCSI_DATA_NONE == SBUS_DMA_NONE))
+#define scsi_to_sbus_dma_dir(scsi_dir) ((int)(scsi_dir))
+#else
+extern __inline__ int scsi_to_sbus_dma_dir(unsigned char scsi_dir)
+{
+ if (scsi_dir == SCSI_DATA_UNKNOWN)
+ return SBUS_DMA_BIDIRECTIONAL;
+ if (scsi_dir == SCSI_DATA_WRITE)
+ return SBUS_DMA_TODEVICE;
+ if (scsi_dir == SCSI_DATA_READ)
+ return SBUS_DMA_FROMDEVICE;
+ return SBUS_DMA_NONE;
+}
+#endif
+#endif
+
+/*
+ * Some defs, in case these are not defined elsewhere.
+ */
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#define MAX_SCSI_DEVICE_CODE 14
+extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
+
+#ifdef DEBUG
+#define SCSI_TIMEOUT (5*HZ)
+#else
+#define SCSI_TIMEOUT (2*HZ)
+#endif
+
+/*
+ * Used for debugging the new queueing code. We want to make sure
+ * that the lock state is consistent with design. Only do this in
+ * the user space simulator.
+ */
+#define ASSERT_LOCK(_LOCK, _COUNT)
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USER_DEBUG)
+#undef ASSERT_LOCK
+#define ASSERT_LOCK(_LOCK,_COUNT) \
+ { if( (_LOCK)->lock != _COUNT ) \
+ panic("Lock count inconsistent %s %d\n", __FILE__, __LINE__); \
+ }
+#endif
+
+/*
+ * Use these to separate status msg and our bytes
+ *
+ * These are set by:
+ *
+ * status byte = set from target device
+ * msg_byte = return status from host adapter itself.
+ * host_byte = set by low-level driver to indicate status.
+ * driver_byte = set by mid-level.
+ */
+#define status_byte(result) (((result) >> 1) & 0x1f)
+#define msg_byte(result) (((result) >> 8) & 0xff)
+#define host_byte(result) (((result) >> 16) & 0xff)
+#define driver_byte(result) (((result) >> 24) & 0xff)
+#define suggestion(result) (driver_byte(result) & SUGGEST_MASK)
+
+#define sense_class(sense) (((sense) >> 4) & 0x7)
+#define sense_error(sense) ((sense) & 0xf)
+#define sense_valid(sense) ((sense) & 0x80);
+
+#define NEEDS_RETRY 0x2001
+#define SUCCESS 0x2002
+#define FAILED 0x2003
+#define QUEUED 0x2004
+#define SOFT_ERROR 0x2005
+#define ADD_TO_MLQUEUE 0x2006
+
+/*
+ * These are the values that scsi_cmd->state can take.
+ */
+#define SCSI_STATE_TIMEOUT 0x1000
+#define SCSI_STATE_FINISHED 0x1001
+#define SCSI_STATE_FAILED 0x1002
+#define SCSI_STATE_QUEUED 0x1003
+#define SCSI_STATE_UNUSED 0x1006
+#define SCSI_STATE_DISCONNECTING 0x1008
+#define SCSI_STATE_INITIALIZING 0x1009
+#define SCSI_STATE_BHQUEUE 0x100a
+#define SCSI_STATE_MLQUEUE 0x100b
+
+/*
+ * These are the values that the owner field can take.
+ * They are used as an indication of who the command belongs to.
+ */
+#define SCSI_OWNER_HIGHLEVEL 0x100
+#define SCSI_OWNER_MIDLEVEL 0x101
+#define SCSI_OWNER_LOWLEVEL 0x102
+#define SCSI_OWNER_ERROR_HANDLER 0x103
+#define SCSI_OWNER_BH_HANDLER 0x104
+#define SCSI_OWNER_NOBODY 0x105
+
+#define COMMAND_SIZE(opcode) scsi_command_size[((opcode) >> 5) & 7]
+
+#define IDENTIFY_BASE 0x80
+#define IDENTIFY(can_disconnect, lun) (IDENTIFY_BASE |\
+ ((can_disconnect) ? 0x40 : 0) |\
+ ((lun) & 0x07))
+
+
+/*
+ * This defines the scsi logging feature. It is a means by which the
+ * user can select how much information they get about various goings on,
+ * and it can be really useful for fault tracing. The logging word is divided
+ * into 8 nibbles, each of which describes a loglevel. The division of things
+ * is somewhat arbitrary, and the division of the word could be changed if it
+ * were really needed for any reason. The numbers below are the only place where these
+ * are specified. For a first go-around, 3 bits is more than enough, since this
+ * gives 8 levels of logging (really 7, since 0 is always off). Cutting to 2 bits
+ * might be wise at some point.
+ */
+
+#define SCSI_LOG_ERROR_SHIFT 0
+#define SCSI_LOG_TIMEOUT_SHIFT 3
+#define SCSI_LOG_SCAN_SHIFT 6
+#define SCSI_LOG_MLQUEUE_SHIFT 9
+#define SCSI_LOG_MLCOMPLETE_SHIFT 12
+#define SCSI_LOG_LLQUEUE_SHIFT 15
+#define SCSI_LOG_LLCOMPLETE_SHIFT 18
+#define SCSI_LOG_HLQUEUE_SHIFT 21
+#define SCSI_LOG_HLCOMPLETE_SHIFT 24
+#define SCSI_LOG_IOCTL_SHIFT 27
+
+#define SCSI_LOG_ERROR_BITS 3
+#define SCSI_LOG_TIMEOUT_BITS 3
+#define SCSI_LOG_SCAN_BITS 3
+#define SCSI_LOG_MLQUEUE_BITS 3
+#define SCSI_LOG_MLCOMPLETE_BITS 3
+#define SCSI_LOG_LLQUEUE_BITS 3
+#define SCSI_LOG_LLCOMPLETE_BITS 3
+#define SCSI_LOG_HLQUEUE_BITS 3
+#define SCSI_LOG_HLCOMPLETE_BITS 3
+#define SCSI_LOG_IOCTL_BITS 3
+
+#if CONFIG_SCSI_LOGGING
+
+#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) \
+{ \
+ unsigned int mask; \
+ \
+ mask = (1 << (BITS)) - 1; \
+ if( ((scsi_logging_level >> (SHIFT)) & mask) > (LEVEL) ) \
+ { \
+ (CMD); \
+ } \
+}
+
+#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL) \
+{ \
+ unsigned int mask; \
+ \
+ mask = ((1 << (BITS)) - 1) << SHIFT; \
+ scsi_logging_level = ((scsi_logging_level & ~mask) \
+ | ((LEVEL << SHIFT) & mask)); \
+}
+
+
+
+#else
+
+/*
+ * With no logging enabled, stub these out so they don't do anything.
+ */
+#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL)
+
+#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD)
+#endif
+
+/*
+ * These are the macros that are actually used throughout the code to
+ * log events. If logging isn't enabled, they are no-ops and will be
+ * completely absent from the user's code.
+ *
+ * The 'set' versions of the macros are really intended to only be called
+ * from the /proc filesystem, and in production kernels this will be about
+ * all that is ever used. It could be useful in a debugging environment to
+ * bump the logging level when certain strange events are detected, however.
+ */
+#define SCSI_LOG_ERROR_RECOVERY(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL,CMD);
+#define SCSI_LOG_TIMEOUT(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL,CMD);
+#define SCSI_LOG_SCAN_BUS(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL,CMD);
+#define SCSI_LOG_MLQUEUE(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_MLCOMPLETE(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_LLQUEUE(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_LLCOMPLETE(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_HLQUEUE(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_HLCOMPLETE(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_IOCTL(LEVEL,CMD) \
+ SCSI_CHECK_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL,CMD);
+
+
+#define SCSI_SET_ERROR_RECOVERY_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL);
+#define SCSI_SET_TIMEOUT_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL);
+#define SCSI_SET_SCAN_BUS_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL);
+#define SCSI_SET_MLQUEUE_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL);
+#define SCSI_SET_MLCOMPLETE_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_LLQUEUE_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL);
+#define SCSI_SET_LLCOMPLETE_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_HLQUEUE_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL);
+#define SCSI_SET_HLCOMPLETE_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_IOCTL_LOGGING(LEVEL) \
+ SCSI_SET_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL);
+
+/*
+ * the return of the status word will be in the following format :
+ * The low byte is the status returned by the SCSI command,
+ * with vendor specific bits masked.
+ *
+ * The next byte is the message which followed the SCSI status.
+ * This allows a stos to be used, since the Intel is a little
+ * endian machine.
+ *
+ * The final byte is a host return code, which is one of the following.
+ *
+ * IE
+ * lsb msb
+ * status msg host code
+ *
+ * Our errors returned by OUR driver, NOT SCSI message. Or'd with
+ * SCSI message passed back to driver <IF any>.
+ */
+
+
+#define DID_OK 0x00 /* NO error */
+#define DID_NO_CONNECT 0x01 /* Couldn't connect before timeout period */
+#define DID_BUS_BUSY 0x02 /* BUS stayed busy through time out period */
+#define DID_TIME_OUT 0x03 /* TIMED OUT for other reason */
+#define DID_BAD_TARGET 0x04 /* BAD target. */
+#define DID_ABORT 0x05 /* Told to abort for some other reason */
+#define DID_PARITY 0x06 /* Parity error */
+#define DID_ERROR 0x07 /* Internal error */
+#define DID_RESET 0x08 /* Reset by somebody. */
+#define DID_BAD_INTR 0x09 /* Got an interrupt we weren't expecting. */
+#define DID_PASSTHROUGH 0x0a /* Force command past mid-layer */
+#define DID_SOFT_ERROR 0x0b /* The low level driver just wish a retry */
+#define DRIVER_OK 0x00 /* Driver status */
+
+/*
+ * These indicate the error that occurred, and what is available.
+ */
+
+#define DRIVER_BUSY 0x01
+#define DRIVER_SOFT 0x02
+#define DRIVER_MEDIA 0x03
+#define DRIVER_ERROR 0x04
+
+#define DRIVER_INVALID 0x05
+#define DRIVER_TIMEOUT 0x06
+#define DRIVER_HARD 0x07
+#define DRIVER_SENSE 0x08
+
+#define SUGGEST_RETRY 0x10
+#define SUGGEST_ABORT 0x20
+#define SUGGEST_REMAP 0x30
+#define SUGGEST_DIE 0x40
+#define SUGGEST_SENSE 0x80
+#define SUGGEST_IS_OK 0xff
+
+#define DRIVER_MASK 0x0f
+#define SUGGEST_MASK 0xf0
+
+#define MAX_COMMAND_SIZE 16
+#define SCSI_SENSE_BUFFERSIZE 64
+
+/*
+ * SCSI command sets
+ */
+
+#define SCSI_UNKNOWN 0
+#define SCSI_1 1
+#define SCSI_1_CCS 2
+#define SCSI_2 3
+#define SCSI_3 4
+
+/*
+ * Every SCSI command starts with a one byte OP-code.
+ * The next byte's high three bits are the LUN of the
+ * device. Any multi-byte quantities are stored high byte
+ * first, and may have a 5 bit MSB in the same byte
+ * as the LUN.
+ */
+
+/*
+ * As the scsi do command functions are intelligent, and may need to
+ * redo a command, we need to keep track of the last command
+ * executed on each one.
+ */
+
+#define WAS_RESET 0x01
+#define WAS_TIMEDOUT 0x02
+#define WAS_SENSE 0x04
+#define IS_RESETTING 0x08
+#define IS_ABORTING 0x10
+#define ASKED_FOR_SENSE 0x20
+#define SYNC_RESET 0x40
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+#include <asm/pgtable.h>
+#define CONTIGUOUS_BUFFERS(X,Y) \
+ (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data))
+#else
+#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data)
+#endif
+
+
+/*
+ * This is the crap from the old error handling code. We have it in a special
+ * place so that we can more easily delete it later on.
+ */
+#include "scsi_obsolete.h"
+
+/*
+ * Add some typedefs so that we can prototyope a bunch of the functions.
+ */
+typedef struct scsi_device Scsi_Device;
+typedef struct scsi_cmnd Scsi_Cmnd;
+typedef struct scsi_request Scsi_Request;
+
+#define SCSI_CMND_MAGIC 0xE25C23A5
+#define SCSI_REQ_MAGIC 0x75F6D354
+
+/*
+ * Here is where we prototype most of the mid-layer.
+ */
+
+/*
+ * Initializes all SCSI devices. This scans all scsi busses.
+ */
+
+extern unsigned int scsi_logging_level; /* What do we log? */
+extern unsigned int scsi_dma_free_sectors; /* How much room do we have left */
+extern unsigned int scsi_need_isa_buffer; /* True if some devices need indirection
+ * buffers */
+extern volatile int in_scan_scsis;
+extern const unsigned char scsi_command_size[8];
+
+
+/*
+ * These are the error handling functions defined in scsi_error.c
+ */
+extern void scsi_times_out(Scsi_Cmnd * SCpnt);
+extern void scsi_add_timer(Scsi_Cmnd * SCset, int timeout,
+ void (*complete) (Scsi_Cmnd *));
+extern int scsi_delete_timer(Scsi_Cmnd * SCset);
+extern void scsi_error_handler(void *host);
+extern int scsi_sense_valid(Scsi_Cmnd *);
+extern int scsi_decide_disposition(Scsi_Cmnd * SCpnt);
+extern int scsi_block_when_processing_errors(Scsi_Device *);
+extern void scsi_sleep(int);
+
+/*
+ * Prototypes for functions in scsicam.c
+ */
+extern int scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+ unsigned int *cyls, unsigned int *hds,
+ unsigned int *secs);
+
+/*
+ * Prototypes for functions in scsi_dma.c
+ */
+void scsi_resize_dma_pool(void);
+int scsi_init_minimal_dma_pool(void);
+void *scsi_malloc(unsigned int);
+int scsi_free(void *, unsigned int);
+
+/*
+ * Prototypes for functions in scsi_merge.c
+ */
+extern void recount_segments(Scsi_Cmnd * SCpnt);
+extern void initialize_merge_fn(Scsi_Device * SDpnt);
+
+/*
+ * Prototypes for functions in scsi_queue.c
+ */
+extern int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason);
+
+/*
+ * Prototypes for functions in scsi_lib.c
+ */
+extern int scsi_maybe_unblock_host(Scsi_Device * SDpnt);
+extern Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate,
+ int sectors);
+extern struct Scsi_Device_Template *scsi_get_request_dev(struct request *);
+extern int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt);
+extern int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int);
+extern void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
+ int block_sectors);
+extern void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt);
+extern void scsi_request_fn(request_queue_t * q);
+extern int scsi_starvation_completion(Scsi_Device * SDpnt);
+
+/*
+ * Prototypes for functions in scsi.c
+ */
+extern int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt);
+extern void scsi_bottom_half_handler(void);
+extern void scsi_release_commandblocks(Scsi_Device * SDpnt);
+extern void scsi_build_commandblocks(Scsi_Device * SDpnt);
+extern void scsi_done(Scsi_Cmnd * SCpnt);
+extern void scsi_finish_command(Scsi_Cmnd *);
+extern int scsi_retry_command(Scsi_Cmnd *);
+extern Scsi_Cmnd *scsi_allocate_device(Scsi_Device *, int, int);
+extern void __scsi_release_command(Scsi_Cmnd *);
+extern void scsi_release_command(Scsi_Cmnd *);
+extern void scsi_do_cmd(Scsi_Cmnd *, const void *cmnd,
+ void *buffer, unsigned bufflen,
+ void (*done) (struct scsi_cmnd *),
+ int timeout, int retries);
+extern int scsi_dev_init(void);
+
+/*
+ * Newer request-based interfaces.
+ */
+extern Scsi_Request *scsi_allocate_request(Scsi_Device *);
+extern void scsi_release_request(Scsi_Request *);
+extern void scsi_wait_req(Scsi_Request *, const void *cmnd,
+ void *buffer, unsigned bufflen,
+ int timeout, int retries);
+
+extern void scsi_do_req(Scsi_Request *, const void *cmnd,
+ void *buffer, unsigned bufflen,
+ void (*done) (struct scsi_cmnd *),
+ int timeout, int retries);
+extern int scsi_insert_special_req(Scsi_Request * SRpnt, int);
+extern void scsi_init_cmd_from_req(Scsi_Cmnd *, Scsi_Request *);
+
+
+/*
+ * Prototypes for functions/data in hosts.c
+ */
+extern int max_scsi_hosts;
+
+/*
+ * Prototypes for functions in scsi_proc.c
+ */
+extern void proc_print_scsidevice(Scsi_Device *, char *, int *, int);
+extern struct proc_dir_entry *proc_scsi;
+
+/*
+ * Prototypes for functions in constants.c
+ */
+extern void print_command(unsigned char *);
+extern void print_sense(const char *, Scsi_Cmnd *);
+extern void print_req_sense(const char *, Scsi_Request *);
+extern void print_driverbyte(int scsiresult);
+extern void print_hostbyte(int scsiresult);
+extern void print_status (int status);
+
+/*
+ * The scsi_device struct contains what we know about each given scsi
+ * device.
+ *
+ * FIXME(eric) - one of the great regrets that I have is that I failed to define
+ * these structure elements as something like sdev_foo instead of foo. This would
+ * make it so much easier to grep through sources and so forth. I propose that
+ * all new elements that get added to these structures follow this convention.
+ * As time goes on and as people have the stomach for it, it should be possible to
+ * go back and retrofit at least some of the elements here with with the prefix.
+ */
+
+struct scsi_device {
+/* private: */
+ /*
+ * This information is private to the scsi mid-layer. Wrapping it in a
+ * struct private is a way of marking it in a sort of C++ type of way.
+ */
+ struct scsi_device *next; /* Used for linked list */
+ struct scsi_device *prev; /* Used for linked list */
+#if 0
+ wait_queue_head_t scpnt_wait; /* Used to wait if
+ device is busy */
+#endif
+
+ struct Scsi_Host *host;
+ request_queue_t request_queue;
+ atomic_t device_active; /* commands checked out for device */
+ volatile unsigned short device_busy; /* commands actually active on low-level */
+ int (*scsi_init_io_fn) (Scsi_Cmnd *); /* Used to initialize
+ new request */
+ Scsi_Cmnd *device_queue; /* queue of SCSI Command structures */
+
+/* public: */
+ unsigned int id, lun, channel;
+
+ unsigned int manufacturer; /* Manufacturer of device, for using
+ * vendor-specific cmd's */
+ unsigned sector_size; /* size in bytes */
+
+ int attached; /* # of high level drivers attached to this */
+ int detected; /* Delta attached - don't use in drivers! */
+ int access_count; /* Count of open channels/mounts */
+
+ void *hostdata; /* available to low-level driver */
+#if 0
+ devfs_handle_t de; /* directory for the device */
+#endif
+ char type;
+ char scsi_level;
+ char vendor[8], model[16], rev[4];
+ unsigned char current_tag; /* current tag */
+ unsigned char sync_min_period; /* Not less than this period */
+ unsigned char sync_max_offset; /* Not greater than this offset */
+ unsigned char queue_depth; /* How deep a queue to use */
+
+ unsigned online:1;
+ unsigned writeable:1;
+ unsigned removable:1;
+ unsigned random:1;
+ unsigned has_cmdblocks:1;
+ unsigned changed:1; /* Data invalid due to media change */
+ unsigned busy:1; /* Used to prevent races */
+ unsigned lockable:1; /* Able to prevent media removal */
+ unsigned borken:1; /* Tell the Seagate driver to be
+ * painfully slow on this device */
+ unsigned tagged_supported:1; /* Supports SCSI-II tagged queuing */
+ unsigned tagged_queue:1; /* SCSI-II tagged queuing enabled */
+ unsigned disconnect:1; /* can disconnect */
+ unsigned soft_reset:1; /* Uses soft reset option */
+ unsigned sync:1; /* Negotiate for sync transfers */
+ unsigned wide:1; /* Negotiate for WIDE transfers */
+ unsigned single_lun:1; /* Indicates we should only allow I/O to
+ * one of the luns for the device at a
+ * time. */
+ unsigned was_reset:1; /* There was a bus reset on the bus for
+ * this device */
+ unsigned expecting_cc_ua:1; /* Expecting a CHECK_CONDITION/UNIT_ATTN
+ * because we did a bus reset. */
+ unsigned device_blocked:1; /* Device returned QUEUE_FULL. */
+ unsigned ten:1; /* support ten byte read / write */
+ unsigned remap:1; /* support remapping */
+ unsigned starved:1; /* unable to process commands because
+ host busy */
+
+ // Flag to allow revalidate to succeed in sd_open
+ int allow_revalidate;
+};
+
+
+/*
+ * The Scsi_Cmnd structure is used by scsi.c internally, and for communication
+ * with low level drivers that support multiple outstanding commands.
+ */
+typedef struct scsi_pointer {
+ char *ptr; /* data pointer */
+ int this_residual; /* left in this buffer */
+ struct scatterlist *buffer; /* which buffer */
+ int buffers_residual; /* how many buffers left */
+
+ dma_addr_t dma_handle;
+
+ volatile int Status;
+ volatile int Message;
+ volatile int have_data_in;
+ volatile int sent_command;
+ volatile int phase;
+} Scsi_Pointer;
+
+/*
+ * This is essentially a slimmed down version of Scsi_Cmnd. The point of
+ * having this is that requests that are injected into the queue as result
+ * of things like ioctls and character devices shouldn't be using a
+ * Scsi_Cmnd until such a time that the command is actually at the head
+ * of the queue and being sent to the driver.
+ */
+struct scsi_request {
+ int sr_magic;
+ int sr_result; /* Status code from lower level driver */
+ unsigned char sr_sense_buffer[SCSI_SENSE_BUFFERSIZE];
+ /* obtained by REQUEST SENSE when CHECK CONDITION is received
+ on original command (auto-sense) */
+
+ struct Scsi_Host *sr_host;
+ Scsi_Device *sr_device;
+ Scsi_Cmnd *sr_command;
+#define SMHHACK
+#ifdef SMHHACK
+ void *freeaddr;
+#endif
+ struct request sr_request; /* A copy of the command we are
+ working on */
+ unsigned sr_bufflen; /* Size of data buffer */
+ void *sr_buffer; /* Data buffer */
+ int sr_allowed;
+ unsigned char sr_data_direction;
+ unsigned char sr_cmd_len;
+ unsigned char sr_cmnd[MAX_COMMAND_SIZE];
+ void (*sr_done) (struct scsi_cmnd *); /* Mid-level done function */
+ int sr_timeout_per_command;
+ unsigned short sr_use_sg; /* Number of pieces of scatter-gather */
+ unsigned short sr_sglist_len; /* size of malloc'd scatter-gather list */
+ unsigned sr_underflow; /* Return error if less than
+ this amount is transferred */
+};
+
+/*
+ * FIXME(eric) - one of the great regrets that I have is that I failed to define
+ * these structure elements as something like sc_foo instead of foo. This would
+ * make it so much easier to grep through sources and so forth. I propose that
+ * all new elements that get added to these structures follow this convention.
+ * As time goes on and as people have the stomach for it, it should be possible to
+ * go back and retrofit at least some of the elements here with with the prefix.
+ */
+struct scsi_cmnd {
+ int sc_magic;
+/* private: */
+ /*
+ * This information is private to the scsi mid-layer. Wrapping it in a
+ * struct private is a way of marking it in a sort of C++ type of way.
+ */
+ struct Scsi_Host *host;
+ unsigned short state;
+ unsigned short owner;
+ Scsi_Device *device;
+ Scsi_Request *sc_request;
+ struct scsi_cmnd *next;
+ struct scsi_cmnd *reset_chain;
+
+ int eh_state; /* Used for state tracking in error handlr */
+ void (*done) (struct scsi_cmnd *); /* Mid-level done function */
+ /*
+ A SCSI Command is assigned a nonzero serial_number when internal_cmnd
+ passes it to the driver's queue command function. The serial_number
+ is cleared when scsi_done is entered indicating that the command has
+ been completed. If a timeout occurs, the serial number at the moment
+ of timeout is copied into serial_number_at_timeout. By subsequently
+ comparing the serial_number and serial_number_at_timeout fields
+ during abort or reset processing, we can detect whether the command
+ has already completed. This also detects cases where the command has
+ completed and the SCSI Command structure has already being reused
+ for another command, so that we can avoid incorrectly aborting or
+ resetting the new command.
+ */
+
+ unsigned long serial_number;
+ unsigned long serial_number_at_timeout;
+
+ int retries;
+ int allowed;
+ int timeout_per_command;
+ int timeout_total;
+ int timeout;
+
+ /*
+ * We handle the timeout differently if it happens when a reset,
+ * abort, etc are in process.
+ */
+ unsigned volatile char internal_timeout;
+ struct scsi_cmnd *bh_next; /* To enumerate the commands waiting
+ to be processed. */
+
+/* public: */
+
+ unsigned int target;
+ unsigned int lun;
+ unsigned int channel;
+ unsigned char cmd_len;
+ unsigned char old_cmd_len;
+ unsigned char sc_data_direction;
+ unsigned char sc_old_data_direction;
+
+ /* These elements define the operation we are about to perform */
+ unsigned char cmnd[MAX_COMMAND_SIZE];
+ unsigned request_bufflen; /* Actual request size */
+
+ struct timer_list eh_timeout; /* Used to time out the command. */
+ void *request_buffer; /* Actual requested buffer */
+ void **bounce_buffers; /* Array of bounce buffers when using scatter-gather */
+
+ /* These elements define the operation we ultimately want to perform */
+ unsigned char data_cmnd[MAX_COMMAND_SIZE];
+ unsigned short old_use_sg; /* We save use_sg here when requesting
+ * sense info */
+ unsigned short use_sg; /* Number of pieces of scatter-gather */
+ unsigned short sglist_len; /* size of malloc'd scatter-gather list */
+ unsigned short abort_reason; /* If the mid-level code requests an
+ * abort, this is the reason. */
+ unsigned bufflen; /* Size of data buffer */
+ void *buffer; /* Data buffer */
+
+ unsigned underflow; /* Return error if less than
+ this amount is transferred */
+ unsigned old_underflow; /* save underflow here when reusing the
+ * command for error handling */
+
+ unsigned transfersize; /* How much we are guaranteed to
+ transfer with each SCSI transfer
+ (ie, between disconnect /
+ reconnects. Probably == sector
+ size */
+
+ int resid; /* Number of bytes requested to be
+ transferred less actual number
+ transferred (0 if not supported) */
+
+ struct request request; /* A copy of the command we are
+ working on */
+
+ unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE]; /* obtained by REQUEST SENSE
+ * when CHECK CONDITION is
+ * received on original command
+ * (auto-sense) */
+
+ unsigned flags;
+
+ /*
+ * Used to indicate that a command which has timed out also
+ * completed normally. Typically the completion function will
+ * do nothing but set this flag in this instance because the
+ * timeout handler is already running.
+ */
+ unsigned done_late:1;
+
+ /* Low-level done function - can be used by low-level driver to point
+ * to completion function. Not used by mid/upper level code. */
+ void (*scsi_done) (struct scsi_cmnd *);
+
+ /*
+ * The following fields can be written to by the host specific code.
+ * Everything else should be left alone.
+ */
+
+ Scsi_Pointer SCp; /* Scratchpad used by some host adapters */
+
+ unsigned char *host_scribble; /* The host adapter is allowed to
+ * call scsi_malloc and get some memory
+ * and hang it here. The host adapter
+ * is also expected to call scsi_free
+ * to release this memory. (The memory
+ * obtained by scsi_malloc is guaranteed
+ * to be at an address < 16Mb). */
+
+ int result; /* Status code from lower level driver */
+
+ unsigned char tag; /* SCSI-II queued command tag */
+ unsigned long pid; /* Process ID, starts at 0 */
+};
+
+/*
+ * Flag bit for the internal_timeout array
+ */
+#define NORMAL_TIMEOUT 0
+
+/*
+ * Definitions and prototypes used for scsi mid-level queue.
+ */
+#define SCSI_MLQUEUE_HOST_BUSY 0x1055
+#define SCSI_MLQUEUE_DEVICE_BUSY 0x1056
+
+#if 0
+#define SCSI_SLEEP(QUEUE, CONDITION) { \
+ if (CONDITION) { \
+ DECLARE_WAITQUEUE(wait, current); \
+ add_wait_queue(QUEUE, &wait); \
+ for(;;) { \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ if (CONDITION) { \
+ if (in_interrupt()) \
+ panic("scsi: trying to call schedule() in interrupt" \
+ ", file %s, line %d.\n", __FILE__, __LINE__); \
+ schedule(); \
+ } \
+ else \
+ break; \
+ } \
+ remove_wait_queue(QUEUE, &wait);\
+ current->state = TASK_RUNNING; \
+ }; }
+#else
+#define SCSI_SLEEP(QUEUE, CONDITION) { printk("SCSI_SLEEP!\n"); BUG(); }
+#endif
+
+
+
+
+/*
+ * old style reset request from external source
+ * (private to sg.c and scsi_error.c, supplied by scsi_obsolete.c)
+ */
+#define SCSI_TRY_RESET_DEVICE 1
+#define SCSI_TRY_RESET_BUS 2
+#define SCSI_TRY_RESET_HOST 3
+
+extern int scsi_reset_provider(Scsi_Device *, int);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_dma.c b/xen/drivers/scsi/scsi_dma.c
new file mode 100644
index 0000000000..94c2118da0
--- /dev/null
+++ b/xen/drivers/scsi/scsi_dma.c
@@ -0,0 +1,455 @@
+/*
+ * scsi_dma.c Copyright (C) 2000 Eric Youngdale
+ *
+ * mid-level SCSI DMA bounce buffer allocator
+ *
+ */
+
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/blk.h>
+
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+/*
+ * PAGE_SIZE must be a multiple of the sector size (512). True
+ * for all reasonably recent architectures (even the VAX...).
+ */
+#define SECTOR_SIZE 512
+#define SECTORS_PER_PAGE (PAGE_SIZE/SECTOR_SIZE)
+
+#if SECTORS_PER_PAGE <= 8
+typedef unsigned char FreeSectorBitmap;
+#elif SECTORS_PER_PAGE <= 32
+typedef unsigned int FreeSectorBitmap;
+#else
+#error You lose.
+#endif
+
+/*
+ * Used for access to internal allocator used for DMA safe buffers.
+ */
+static spinlock_t allocator_request_lock = SPIN_LOCK_UNLOCKED;
+
+static FreeSectorBitmap *dma_malloc_freelist = NULL;
+static int need_isa_bounce_buffers;
+static unsigned int dma_sectors = 0;
+unsigned int scsi_dma_free_sectors = 0;
+unsigned int scsi_need_isa_buffer = 0;
+static unsigned char **dma_malloc_pages = NULL;
+
+/*
+ * Function: scsi_malloc
+ *
+ * Purpose: Allocate memory from the DMA-safe pool.
+ *
+ * Arguments: len - amount of memory we need.
+ *
+ * Lock status: No locks assumed to be held. This function is SMP-safe.
+ *
+ * Returns: Pointer to memory block.
+ *
+ * Notes: Prior to the new queue code, this function was not SMP-safe.
+ * This function can only allocate in units of sectors
+ * (i.e. 512 bytes).
+ *
+ * We cannot use the normal system allocator becuase we need
+ * to be able to guarantee that we can process a complete disk
+ * I/O request without touching the system allocator. Think
+ * about it - if the system were heavily swapping, and tried to
+ * write out a block of memory to disk, and the SCSI code needed
+ * to allocate more memory in order to be able to write the
+ * data to disk, you would wedge the system.
+ */
+void *scsi_malloc(unsigned int len)
+{
+ unsigned int nbits, mask;
+ unsigned long flags;
+
+ int i, j;
+ if (len % SECTOR_SIZE != 0 || len > PAGE_SIZE)
+ return NULL;
+
+ nbits = len >> 9;
+ mask = (1 << nbits) - 1;
+
+ spin_lock_irqsave(&allocator_request_lock, flags);
+
+ for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++)
+ for (j = 0; j <= SECTORS_PER_PAGE - nbits; j++) {
+ if ((dma_malloc_freelist[i] & (mask << j)) == 0) {
+ dma_malloc_freelist[i] |= (mask << j);
+ scsi_dma_free_sectors -= nbits;
+#ifdef DEBUG
+ SCSI_LOG_MLQUEUE(3, printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9)));
+ printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9));
+#endif
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ return (void *) ((unsigned long) dma_malloc_pages[i] + (j << 9));
+ }
+ }
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ return NULL; /* Nope. No more */
+}
+
+/*
+ * Function: scsi_free
+ *
+ * Purpose: Free memory into the DMA-safe pool.
+ *
+ * Arguments: ptr - data block we are freeing.
+ * len - size of block we are freeing.
+ *
+ * Lock status: No locks assumed to be held. This function is SMP-safe.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This function *must* only be used to free memory
+ * allocated from scsi_malloc().
+ *
+ * Prior to the new queue code, this function was not SMP-safe.
+ * This function can only allocate in units of sectors
+ * (i.e. 512 bytes).
+ */
+int scsi_free(void *obj, unsigned int len)
+{
+ unsigned int page, sector, nbits, mask;
+ unsigned long flags;
+
+#ifdef DEBUG
+ unsigned long ret = 0;
+
+#ifdef __mips__
+ __asm__ __volatile__("move\t%0,$31":"=r"(ret));
+#else
+ ret = __builtin_return_address(0);
+#endif
+ printk("scsi_free %p %d\n", obj, len);
+ SCSI_LOG_MLQUEUE(3, printk("SFree: %p %d\n", obj, len));
+#endif
+
+ spin_lock_irqsave(&allocator_request_lock, flags);
+
+ for (page = 0; page < dma_sectors / SECTORS_PER_PAGE; page++) {
+ unsigned long page_addr = (unsigned long) dma_malloc_pages[page];
+ if ((unsigned long) obj >= page_addr &&
+ (unsigned long) obj < page_addr + PAGE_SIZE) {
+ sector = (((unsigned long) obj) - page_addr) >> 9;
+
+ nbits = len >> 9;
+ mask = (1 << nbits) - 1;
+
+ if (sector + nbits > SECTORS_PER_PAGE)
+ panic("scsi_free:Bad memory alignment");
+
+ if ((dma_malloc_freelist[page] &
+ (mask << sector)) != (mask << sector)) {
+#ifdef DEBUG
+ printk("scsi_free(obj=%p, len=%d) called from %08lx\n",
+ obj, len, ret);
+#endif
+ panic("scsi_free:Trying to free unused memory");
+ }
+ scsi_dma_free_sectors += nbits;
+ dma_malloc_freelist[page] &= ~(mask << sector);
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ return 0;
+ }
+ }
+ panic("scsi_free:Bad offset");
+ return -1;
+}
+
+
+/*
+ * Function: scsi_resize_dma_pool
+ *
+ * Purpose: Ensure that the DMA pool is sufficiently large to be
+ * able to guarantee that we can always process I/O requests
+ * without calling the system allocator.
+ *
+ * Arguments: None.
+ *
+ * Lock status: No locks assumed to be held. This function is SMP-safe.
+ *
+ * Returns: Nothing
+ *
+ * Notes: Prior to the new queue code, this function was not SMP-safe.
+ * Go through the device list and recompute the most appropriate
+ * size for the dma pool. Then grab more memory (as required).
+ */
+void scsi_resize_dma_pool(void)
+{
+ int i, k;
+ unsigned long size;
+ unsigned long flags;
+ struct Scsi_Host *shpnt;
+ struct Scsi_Host *host = NULL;
+ Scsi_Device *SDpnt;
+ FreeSectorBitmap *new_dma_malloc_freelist = NULL;
+ unsigned int new_dma_sectors = 0;
+ unsigned int new_need_isa_buffer = 0;
+ unsigned char **new_dma_malloc_pages = NULL;
+ int out_of_space = 0;
+
+ spin_lock_irqsave(&allocator_request_lock, flags);
+
+ if (!scsi_hostlist) {
+ /*
+ * Free up the DMA pool.
+ */
+ if (scsi_dma_free_sectors != dma_sectors)
+ panic("SCSI DMA pool memory leak %d %d\n",
+ scsi_dma_free_sectors, dma_sectors);
+
+ for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++)
+ free_pages((unsigned long) dma_malloc_pages[i], 0);
+ if (dma_malloc_pages)
+ kfree((char *) dma_malloc_pages);
+ dma_malloc_pages = NULL;
+ if (dma_malloc_freelist)
+ kfree((char *) dma_malloc_freelist);
+ dma_malloc_freelist = NULL;
+ dma_sectors = 0;
+ scsi_dma_free_sectors = 0;
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ return;
+ }
+ /* Next, check to see if we need to extend the DMA buffer pool */
+
+ new_dma_sectors = 2 * SECTORS_PER_PAGE; /* Base value we use */
+
+#if 0
+ if (__pa(high_memory) - 1 > ISA_DMA_THRESHOLD)
+ need_isa_bounce_buffers = 1;
+ else
+#endif
+ need_isa_bounce_buffers = 0;
+
+ if (scsi_devicelist)
+ for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next)
+ new_dma_sectors += SECTORS_PER_PAGE; /* Increment for each host */
+
+ for (host = scsi_hostlist; host; host = host->next) {
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ /*
+ * sd and sr drivers allocate scatterlists.
+ * sr drivers may allocate for each command 1x2048 or 2x1024 extra
+ * buffers for 2k sector size and 1k fs.
+ * sg driver allocates buffers < 4k.
+ * st driver does not need buffers from the dma pool.
+ * estimate 4k buffer/command for devices of unknown type (should panic).
+ */
+ if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM ||
+ SDpnt->type == TYPE_DISK || SDpnt->type == TYPE_MOD) {
+ int nents = host->sg_tablesize;
+#ifdef DMA_CHUNK_SIZE
+ /* If the architecture does DMA sg merging, make sure
+ we count with at least 64 entries even for HBAs
+ which handle very few sg entries. */
+ if (nents < 64) nents = 64;
+#endif
+ new_dma_sectors += ((nents *
+ sizeof(struct scatterlist) + 511) >> 9) *
+ SDpnt->queue_depth;
+ if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM)
+ new_dma_sectors += (2048 >> 9) * SDpnt->queue_depth;
+ } else if (SDpnt->type == TYPE_SCANNER ||
+ SDpnt->type == TYPE_PRINTER ||
+ SDpnt->type == TYPE_PROCESSOR ||
+ SDpnt->type == TYPE_COMM ||
+ SDpnt->type == TYPE_MEDIUM_CHANGER ||
+ SDpnt->type == TYPE_ENCLOSURE) {
+ new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth;
+ } else {
+ if (SDpnt->type != TYPE_TAPE) {
+ printk("resize_dma_pool: unknown device type %d\n", SDpnt->type);
+ new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth;
+ }
+ }
+
+ if (host->unchecked_isa_dma &&
+ need_isa_bounce_buffers &&
+ SDpnt->type != TYPE_TAPE) {
+ new_dma_sectors += (PAGE_SIZE >> 9) * host->sg_tablesize *
+ SDpnt->queue_depth;
+ new_need_isa_buffer++;
+ }
+ }
+ }
+
+#ifdef DEBUG_INIT
+ printk("resize_dma_pool: needed dma sectors = %d\n", new_dma_sectors);
+#endif
+
+ /* limit DMA memory to 32MB: */
+ new_dma_sectors = (new_dma_sectors + 15) & 0xfff0;
+
+ /*
+ * We never shrink the buffers - this leads to
+ * race conditions that I would rather not even think
+ * about right now.
+ */
+#if 0 /* Why do this? No gain and risks out_of_space */
+ if (new_dma_sectors < dma_sectors)
+ new_dma_sectors = dma_sectors;
+#endif
+ if (new_dma_sectors <= dma_sectors) {
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ return; /* best to quit while we are in front */
+ }
+
+ for (k = 0; k < 20; ++k) { /* just in case */
+ out_of_space = 0;
+ size = (new_dma_sectors / SECTORS_PER_PAGE) *
+ sizeof(FreeSectorBitmap);
+ new_dma_malloc_freelist = (FreeSectorBitmap *)
+ kmalloc(size, GFP_ATOMIC);
+ if (new_dma_malloc_freelist) {
+ memset(new_dma_malloc_freelist, 0, size);
+ size = (new_dma_sectors / SECTORS_PER_PAGE) *
+ sizeof(*new_dma_malloc_pages);
+ new_dma_malloc_pages = (unsigned char **)
+ kmalloc(size, GFP_ATOMIC);
+ if (!new_dma_malloc_pages) {
+ size = (new_dma_sectors / SECTORS_PER_PAGE) *
+ sizeof(FreeSectorBitmap);
+ kfree((char *) new_dma_malloc_freelist);
+ out_of_space = 1;
+ } else {
+ memset(new_dma_malloc_pages, 0, size);
+ }
+ } else
+ out_of_space = 1;
+
+ if ((!out_of_space) && (new_dma_sectors > dma_sectors)) {
+ for (i = dma_sectors / SECTORS_PER_PAGE;
+ i < new_dma_sectors / SECTORS_PER_PAGE; i++) {
+ new_dma_malloc_pages[i] = (unsigned char *)
+ __get_free_pages(GFP_ATOMIC | GFP_DMA, 0);
+ if (!new_dma_malloc_pages[i])
+ break;
+ }
+ if (i != new_dma_sectors / SECTORS_PER_PAGE) { /* clean up */
+ int k = i;
+
+ out_of_space = 1;
+ for (i = 0; i < k; ++i)
+ free_pages((unsigned long) new_dma_malloc_pages[i], 0);
+ }
+ }
+ if (out_of_space) { /* try scaling down new_dma_sectors request */
+ printk("scsi::resize_dma_pool: WARNING, dma_sectors=%u, "
+ "wanted=%u, scaling\n", dma_sectors, new_dma_sectors);
+ if (new_dma_sectors < (8 * SECTORS_PER_PAGE))
+ break; /* pretty well hopeless ... */
+ new_dma_sectors = (new_dma_sectors * 3) / 4;
+ new_dma_sectors = (new_dma_sectors + 15) & 0xfff0;
+ if (new_dma_sectors <= dma_sectors)
+ break; /* stick with what we have got */
+ } else
+ break; /* found space ... */
+ } /* end of for loop */
+ if (out_of_space) {
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ scsi_need_isa_buffer = new_need_isa_buffer; /* some useful info */
+ printk(" WARNING, not enough memory, pool not expanded\n");
+ return;
+ }
+ /* When we dick with the actual DMA list, we need to
+ * protect things
+ */
+ if (dma_malloc_freelist) {
+ size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap);
+ memcpy(new_dma_malloc_freelist, dma_malloc_freelist, size);
+ kfree((char *) dma_malloc_freelist);
+ }
+ dma_malloc_freelist = new_dma_malloc_freelist;
+
+ if (dma_malloc_pages) {
+ size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages);
+ memcpy(new_dma_malloc_pages, dma_malloc_pages, size);
+ kfree((char *) dma_malloc_pages);
+ }
+ scsi_dma_free_sectors += new_dma_sectors - dma_sectors;
+ dma_malloc_pages = new_dma_malloc_pages;
+ dma_sectors = new_dma_sectors;
+ scsi_need_isa_buffer = new_need_isa_buffer;
+
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+
+#ifdef DEBUG_INIT
+ printk("resize_dma_pool: dma free sectors = %d\n", scsi_dma_free_sectors);
+ printk("resize_dma_pool: dma sectors = %d\n", dma_sectors);
+ printk("resize_dma_pool: need isa buffers = %d\n", scsi_need_isa_buffer);
+#endif
+}
+
+/*
+ * Function: scsi_init_minimal_dma_pool
+ *
+ * Purpose: Allocate a minimal (1-page) DMA pool.
+ *
+ * Arguments: None.
+ *
+ * Lock status: No locks assumed to be held. This function is SMP-safe.
+ *
+ * Returns: Nothing
+ *
+ * Notes:
+ */
+int scsi_init_minimal_dma_pool(void)
+{
+ unsigned long size;
+ unsigned long flags;
+ int has_space = 0;
+
+ spin_lock_irqsave(&allocator_request_lock, flags);
+
+ dma_sectors = PAGE_SIZE / SECTOR_SIZE;
+ scsi_dma_free_sectors = dma_sectors;
+ /*
+ * Set up a minimal DMA buffer list - this will be used during scan_scsis
+ * in some cases.
+ */
+
+ /* One bit per sector to indicate free/busy */
+ size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap);
+ dma_malloc_freelist = (FreeSectorBitmap *)
+ kmalloc(size, GFP_ATOMIC);
+ if (dma_malloc_freelist) {
+ memset(dma_malloc_freelist, 0, size);
+ /* One pointer per page for the page list */
+ dma_malloc_pages = (unsigned char **) kmalloc(
+ (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages),
+ GFP_ATOMIC);
+ if (dma_malloc_pages) {
+ memset(dma_malloc_pages, 0, size);
+ dma_malloc_pages[0] = (unsigned char *)
+ __get_free_pages(GFP_ATOMIC | GFP_DMA, 0);
+ if (dma_malloc_pages[0])
+ has_space = 1;
+ }
+ }
+ if (!has_space) {
+ if (dma_malloc_freelist) {
+ kfree((char *) dma_malloc_freelist);
+ if (dma_malloc_pages)
+ kfree((char *) dma_malloc_pages);
+ }
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ printk("scsi::init_module: failed, out of memory\n");
+ return 1;
+ }
+
+ spin_unlock_irqrestore(&allocator_request_lock, flags);
+ return 0;
+}
diff --git a/xen/drivers/scsi/scsi_error.c b/xen/drivers/scsi/scsi_error.c
new file mode 100644
index 0000000000..6c043937be
--- /dev/null
+++ b/xen/drivers/scsi/scsi_error.c
@@ -0,0 +1,2063 @@
+/*
+ * scsi_error.c Copyright (C) 1997 Eric Youngdale
+ *
+ * SCSI error/timeout handling
+ * Initial versions: Eric Youngdale. Based upon conversations with
+ * Leonard Zubkoff and David Miller at Linux Expo,
+ * ideas originating from all over the place.
+ *
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/*#include <xeno/string.h>*/
+#include <xeno/slab.h>
+#include <xeno/ioport.h>
+#include <xeno/kernel.h>
+/*#include <xeno/stat.h>*/
+#include <xeno/blk.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+/*#include <xeno/smp_lock.h>*/
+
+#define __KERNEL_SYSCALLS__
+
+/*#include <xeno/unistd.h>*/
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+/*
+ * We must always allow SHUTDOWN_SIGS. Even if we are not a module,
+ * the host drivers that we are using may be loaded as modules, and
+ * when we unload these, we need to ensure that the error handler thread
+ * can be shut down.
+ *
+ * Note - when we unload a module, we send a SIGHUP. We mustn't
+ * enable SIGTERM, as this is how the init shuts things down when you
+ * go to single-user mode. For that matter, init also sends SIGKILL,
+ * so we mustn't enable that one either. We use SIGHUP instead. Other
+ * options would be SIGPWR, I suppose.
+ */
+#define SHUTDOWN_SIGS (sigmask(SIGHUP))
+
+#ifdef DEBUG
+#define SENSE_TIMEOUT SCSI_TIMEOUT
+#define ABORT_TIMEOUT SCSI_TIMEOUT
+#define RESET_TIMEOUT SCSI_TIMEOUT
+#else
+#define SENSE_TIMEOUT (10*HZ)
+#define RESET_TIMEOUT (2*HZ)
+#define ABORT_TIMEOUT (15*HZ)
+#endif
+
+#define STATIC
+
+/*
+ * These should *probably* be handled by the host itself.
+ * Since it is allowed to sleep, it probably should.
+ */
+#define BUS_RESET_SETTLE_TIME 5*HZ
+#define HOST_RESET_SETTLE_TIME 10*HZ
+
+
+static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_error.c,v 1.10 1997/12/08 04:50:35 eric Exp $";
+
+STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt);
+STATIC int scsi_request_sense(Scsi_Cmnd *);
+STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout);
+STATIC int scsi_try_to_abort_command(Scsi_Cmnd *, int);
+STATIC int scsi_test_unit_ready(Scsi_Cmnd *);
+STATIC int scsi_try_bus_device_reset(Scsi_Cmnd *, int timeout);
+STATIC int scsi_try_bus_reset(Scsi_Cmnd *);
+STATIC int scsi_try_host_reset(Scsi_Cmnd *);
+STATIC int scsi_unit_is_ready(Scsi_Cmnd *);
+STATIC void scsi_eh_action_done(Scsi_Cmnd *, int);
+STATIC int scsi_eh_retry_command(Scsi_Cmnd *);
+STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt);
+STATIC void scsi_restart_operations(struct Scsi_Host *);
+STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt);
+
+
+/*
+ * Function: scsi_add_timer()
+ *
+ * Purpose: Start timeout timer for a single scsi command.
+ *
+ * Arguments: SCset - command that is about to start running.
+ * timeout - amount of time to allow this command to run.
+ * complete - timeout function to call if timer isn't
+ * canceled.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This should be turned into an inline function.
+ *
+ * More Notes: Each scsi command has it's own timer, and as it is added to
+ * the queue, we set up the timer. When the command completes,
+ * we cancel the timer. Pretty simple, really, especially
+ * compared to the old way of handling this crap.
+ */
+void scsi_add_timer(Scsi_Cmnd * SCset,
+ int timeout,
+ void (*complete) (Scsi_Cmnd *))
+{
+
+ /*
+ * If the clock was already running for this command, then
+ * first delete the timer. The timer handling code gets rather
+ * confused if we don't do this.
+ */
+ if (SCset->eh_timeout.function != NULL) {
+ del_timer(&SCset->eh_timeout);
+ }
+ SCset->eh_timeout.data = (unsigned long) SCset;
+ SCset->eh_timeout.expires = jiffies + timeout;
+ SCset->eh_timeout.function = (void (*)(unsigned long)) complete;
+
+ SCset->done_late = 0;
+
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at %d (%p)\n", SCset, timeout, complete));
+
+ add_timer(&SCset->eh_timeout);
+
+}
+
+/*
+ * Function: scsi_delete_timer()
+ *
+ * Purpose: Delete/cancel timer for a given function.
+ *
+ * Arguments: SCset - command that we are canceling timer for.
+ *
+ * Returns: 1 if we were able to detach the timer. 0 if we
+ * blew it, and the timer function has already started
+ * to run.
+ *
+ * Notes: This should be turned into an inline function.
+ */
+int scsi_delete_timer(Scsi_Cmnd * SCset)
+{
+ int rtn;
+
+ rtn = del_timer(&SCset->eh_timeout);
+
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p %d\n", SCset, rtn));
+
+ SCset->eh_timeout.data = (unsigned long) NULL;
+ SCset->eh_timeout.function = NULL;
+
+ return rtn;
+}
+
+/*
+ * Function: scsi_times_out()
+ *
+ * Purpose: Timeout function for normal scsi commands..
+ *
+ * Arguments: SCpnt - command that is timing out.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: We do not need to lock this. There is the potential for
+ * a race only in that the normal completion handling might
+ * run, but if the normal completion function determines
+ * that the timer has already fired, then it mustn't do
+ * anything.
+ */
+void scsi_times_out(Scsi_Cmnd * SCpnt)
+{
+ /*
+ * Notify the low-level code that this operation failed and we are
+ * reposessing the command.
+ */
+#ifdef ERIC_neverdef
+ /*
+ * FIXME(eric)
+ * Allow the host adapter to push a queue ordering tag
+ * out to the bus to force the command in question to complete.
+ * If the host wants to do this, then we just restart the timer
+ * for the command. Before we really do this, some real thought
+ * as to the optimum way to handle this should be done. We *do*
+ * need to force ordering every so often to ensure that all requests
+ * do eventually complete, but I am not sure if this is the best way
+ * to actually go about it.
+ *
+ * Better yet, force a sync here, but don't block since we are in an
+ * interrupt.
+ */
+ if (SCpnt->host->hostt->eh_ordered_queue_tag) {
+ if ((*SCpnt->host->hostt->eh_ordered_queue_tag) (SCpnt)) {
+ scsi_add_timer(SCpnt, SCpnt->internal_timeout,
+ scsi_times_out);
+ return;
+ }
+ }
+ /*
+ * FIXME(eric) - add a second special interface to handle this
+ * case. Ideally that interface can also be used to request
+ * a queu
+ */
+ if (SCpnt->host->can_queue) {
+ SCpnt->host->hostt->queuecommand(SCpnt, NULL);
+ }
+#endif
+
+ /* Set the serial_number_at_timeout to the current serial_number */
+ SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+ SCpnt->eh_state = FAILED;
+ SCpnt->state = SCSI_STATE_TIMEOUT;
+ SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+
+ SCpnt->host->in_recovery = 1;
+ SCpnt->host->host_failed++;
+
+ SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d failed=%d\n",
+ atomic_read(&SCpnt->host->host_active),
+ SCpnt->host->host_busy,
+ SCpnt->host->host_failed));
+
+#if 0
+ /*
+ * If the host is having troubles, then look to see if this was the last
+ * command that might have failed. If so, wake up the error handler.
+ */
+ if( SCpnt->host->eh_wait == NULL ) {
+ panic("Error handler thread not present at %p %p %s %d",
+ SCpnt, SCpnt->host, __FILE__, __LINE__);
+ }
+ if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
+ up(SCpnt->host->eh_wait);
+ }
+#endif
+}
+
+/*
+ * Function scsi_block_when_processing_errors
+ *
+ * Purpose: Prevent more commands from being queued while error recovery
+ * is taking place.
+ *
+ * Arguments: SDpnt - device on which we are performing recovery.
+ *
+ * Returns: FALSE The device was taken offline by error recovery.
+ * TRUE OK to proceed.
+ *
+ * Notes: We block until the host is out of error recovery, and then
+ * check to see whether the host or the device is offline.
+ */
+int scsi_block_when_processing_errors(Scsi_Device * SDpnt)
+{
+
+ SCSI_SLEEP(&SDpnt->host->host_wait, SDpnt->host->in_recovery);
+
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n", SDpnt->online));
+
+ return SDpnt->online;
+}
+
+/*
+ * Function: scsi_eh_times_out()
+ *
+ * Purpose: Timeout function for error handling.
+ *
+ * Arguments: SCpnt - command that is timing out.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: During error handling, the kernel thread will be sleeping
+ * waiting for some action to complete on the device. Our only
+ * job is to record that it timed out, and to wake up the
+ * thread.
+ */
+STATIC
+void scsi_eh_times_out(Scsi_Cmnd * SCpnt)
+{
+ SCpnt->eh_state = SCSI_STATE_TIMEOUT;
+ SCSI_LOG_ERROR_RECOVERY(5, printk("In scsi_eh_times_out %p\n", SCpnt));
+
+#if 0
+ if (SCpnt->host->eh_action != NULL)
+ up(SCpnt->host->eh_action);
+ else
+#endif
+ printk("Missing scsi error handler thread\n");
+}
+
+
+/*
+ * Function: scsi_eh_done()
+ *
+ * Purpose: Completion function for error handling.
+ *
+ * Arguments: SCpnt - command that is timing out.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: During error handling, the kernel thread will be sleeping
+ * waiting for some action to complete on the device. Our only
+ * job is to record that the action completed, and to wake up the
+ * thread.
+ */
+STATIC
+void scsi_eh_done(Scsi_Cmnd * SCpnt)
+{
+ int rtn;
+
+ /*
+ * If the timeout handler is already running, then just set the
+ * flag which says we finished late, and return. We have no
+ * way of stopping the timeout handler from running, so we must
+ * always defer to it.
+ */
+ rtn = del_timer(&SCpnt->eh_timeout);
+ if (!rtn) {
+ SCpnt->done_late = 1;
+ return;
+ }
+
+ SCpnt->request.rq_status = RQ_SCSI_DONE;
+
+ SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+ SCpnt->eh_state = SUCCESS;
+
+ SCSI_LOG_ERROR_RECOVERY(5, printk("In eh_done %p result:%x\n", SCpnt,
+ SCpnt->result));
+
+#if 0
+ if (SCpnt->host->eh_action != NULL)
+ up(SCpnt->host->eh_action);
+#endif
+}
+
+/*
+ * Function: scsi_eh_action_done()
+ *
+ * Purpose: Completion function for error handling.
+ *
+ * Arguments: SCpnt - command that is timing out.
+ * answer - boolean that indicates whether operation succeeded.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: This callback is only used for abort and reset operations.
+ */
+STATIC
+void scsi_eh_action_done(Scsi_Cmnd * SCpnt, int answer)
+{
+ SCpnt->request.rq_status = RQ_SCSI_DONE;
+
+ SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+ SCpnt->eh_state = (answer ? SUCCESS : FAILED);
+#if 0
+ if (SCpnt->host->eh_action != NULL)
+ up(SCpnt->host->eh_action);
+#endif
+}
+
+/*
+ * Function: scsi_sense_valid()
+ *
+ * Purpose: Determine whether a host has automatically obtained sense
+ * information or not. If we have it, then give a recommendation
+ * as to what we should do next.
+ */
+int scsi_sense_valid(Scsi_Cmnd * SCpnt)
+{
+ if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/*
+ * Function: scsi_eh_retry_command()
+ *
+ * Purpose: Retry the original command
+ *
+ * Returns: SUCCESS - we were able to get the sense data.
+ * FAILED - we were not able to get the sense data.
+ *
+ * Notes: This function will *NOT* return until the command either
+ * times out, or it completes.
+ */
+STATIC int scsi_eh_retry_command(Scsi_Cmnd * SCpnt)
+{
+ memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+ sizeof(SCpnt->data_cmnd));
+ SCpnt->request_buffer = SCpnt->buffer;
+ SCpnt->request_bufflen = SCpnt->bufflen;
+ SCpnt->use_sg = SCpnt->old_use_sg;
+ SCpnt->cmd_len = SCpnt->old_cmd_len;
+ SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+ SCpnt->underflow = SCpnt->old_underflow;
+
+ scsi_send_eh_cmnd(SCpnt, SCpnt->timeout_per_command);
+
+ /*
+ * Hey, we are done. Let's look to see what happened.
+ */
+ return SCpnt->eh_state;
+}
+
+/*
+ * Function: scsi_request_sense()
+ *
+ * Purpose: Request sense data from a particular target.
+ *
+ * Returns: SUCCESS - we were able to get the sense data.
+ * FAILED - we were not able to get the sense data.
+ *
+ * Notes: Some hosts automatically obtain this information, others
+ * require that we obtain it on our own.
+ *
+ * This function will *NOT* return until the command either
+ * times out, or it completes.
+ */
+STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt)
+{
+ static unsigned char generic_sense[6] =
+ {REQUEST_SENSE, 0, 0, 0, 255, 0};
+ unsigned char scsi_result0[256], *scsi_result = NULL;
+ int saved_result;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
+ sizeof(generic_sense));
+
+ if (SCpnt->device->scsi_level <= SCSI_2)
+ SCpnt->cmnd[1] = SCpnt->lun << 5;
+
+ scsi_result = (!SCpnt->host->hostt->unchecked_isa_dma)
+ ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);
+
+ if (scsi_result == NULL) {
+ printk("cannot allocate scsi_result in scsi_request_sense.\n");
+ return FAILED;
+ }
+ /*
+ * Zero the sense buffer. Some host adapters automatically always request
+ * sense, so it is not a good idea that SCpnt->request_buffer and
+ * SCpnt->sense_buffer point to the same address (DB).
+ * 0 is not a valid sense code.
+ */
+ memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+ memset((void *) scsi_result, 0, 256);
+
+ saved_result = SCpnt->result;
+ SCpnt->request_buffer = scsi_result;
+ SCpnt->request_bufflen = 256;
+ SCpnt->use_sg = 0;
+ SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+ SCpnt->sc_data_direction = SCSI_DATA_READ;
+ SCpnt->underflow = 0;
+
+ scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+
+ /* Last chance to have valid sense data */
+ if (!scsi_sense_valid(SCpnt))
+ memcpy((void *) SCpnt->sense_buffer,
+ SCpnt->request_buffer,
+ sizeof(SCpnt->sense_buffer));
+
+ if (scsi_result != &scsi_result0[0] && scsi_result != NULL)
+ kfree(scsi_result);
+
+ /*
+ * When we eventually call scsi_finish, we really wish to complete
+ * the original request, so let's restore the original data. (DB)
+ */
+ memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+ sizeof(SCpnt->data_cmnd));
+ SCpnt->result = saved_result;
+ SCpnt->request_buffer = SCpnt->buffer;
+ SCpnt->request_bufflen = SCpnt->bufflen;
+ SCpnt->use_sg = SCpnt->old_use_sg;
+ SCpnt->cmd_len = SCpnt->old_cmd_len;
+ SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+ SCpnt->underflow = SCpnt->old_underflow;
+
+ /*
+ * Hey, we are done. Let's look to see what happened.
+ */
+ return SCpnt->eh_state;
+}
+
+/*
+ * Function: scsi_test_unit_ready()
+ *
+ * Purpose: Run test unit ready command to see if the device is talking to us or not.
+ *
+ */
+STATIC int scsi_test_unit_ready(Scsi_Cmnd * SCpnt)
+{
+ static unsigned char tur_command[6] =
+ {TEST_UNIT_READY, 0, 0, 0, 0, 0};
+
+ memcpy((void *) SCpnt->cmnd, (void *) tur_command,
+ sizeof(tur_command));
+
+ if (SCpnt->device->scsi_level <= SCSI_2)
+ SCpnt->cmnd[1] = SCpnt->lun << 5;
+
+ /*
+ * Zero the sense buffer. The SCSI spec mandates that any
+ * untransferred sense data should be interpreted as being zero.
+ */
+ memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+
+ SCpnt->request_buffer = NULL;
+ SCpnt->request_bufflen = 0;
+ SCpnt->use_sg = 0;
+ SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+ SCpnt->underflow = 0;
+ SCpnt->sc_data_direction = SCSI_DATA_NONE;
+
+ scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+
+ /*
+ * When we eventually call scsi_finish, we really wish to complete
+ * the original request, so let's restore the original data. (DB)
+ */
+ memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+ sizeof(SCpnt->data_cmnd));
+ SCpnt->request_buffer = SCpnt->buffer;
+ SCpnt->request_bufflen = SCpnt->bufflen;
+ SCpnt->use_sg = SCpnt->old_use_sg;
+ SCpnt->cmd_len = SCpnt->old_cmd_len;
+ SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+ SCpnt->underflow = SCpnt->old_underflow;
+
+ /*
+ * Hey, we are done. Let's look to see what happened.
+ */
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("scsi_test_unit_ready: SCpnt %p eh_state %x\n",
+ SCpnt, SCpnt->eh_state));
+ return SCpnt->eh_state;
+}
+
+/*
+ * This would normally need to get the IO request lock,
+ * but as it doesn't actually touch anything that needs
+ * to be locked we can avoid the lock here..
+ */
+STATIC
+void scsi_sleep_done(struct semaphore *sem)
+{
+#if 0
+ if (sem != NULL) {
+ up(sem);
+ }
+#endif
+}
+
+void scsi_sleep(int timeout)
+{
+#if 0
+ DECLARE_MUTEX_LOCKED(sem);
+#endif
+ struct timer_list timer;
+
+ init_timer(&timer);
+#if 0
+ timer.data = (unsigned long) &sem;
+#else
+ timer.data = 0xDEADBEEF;
+#endif
+ timer.expires = jiffies + timeout;
+ timer.function = (void (*)(unsigned long)) scsi_sleep_done;
+
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Sleeping for timer tics %d\n", timeout));
+
+ add_timer(&timer);
+
+#if 0
+ down(&sem);
+#endif
+ del_timer(&timer);
+}
+
+/*
+ * Function: scsi_send_eh_cmnd
+ *
+ * Purpose: Send a command out to a device as part of error recovery.
+ *
+ * Notes: The initialization of the structures is quite a bit different
+ * in this case, and furthermore, there is a different completion
+ * handler.
+ */
+STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout)
+{
+ unsigned long flags;
+ struct Scsi_Host *host;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ host = SCpnt->host;
+
+ retry:
+ /*
+ * We will use a queued command if possible, otherwise we will emulate the
+ * queuing and calling of completion function ourselves.
+ */
+ SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+ if (host->can_queue) {
+#if 0
+ DECLARE_MUTEX_LOCKED(sem);
+#endif
+
+ SCpnt->eh_state = SCSI_STATE_QUEUED;
+
+ scsi_add_timer(SCpnt, timeout, scsi_eh_times_out);
+
+#if 0
+ /*
+ * Set up the semaphore so we wait for the command to complete.
+ */
+ SCpnt->host->eh_action = &sem;
+#endif
+ SCpnt->request.rq_status = RQ_SCSI_BUSY;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ host->hostt->queuecommand(SCpnt, scsi_eh_done);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+#if 0
+ down(&sem);
+#endif
+
+ SCpnt->host->eh_action = NULL;
+
+ /*
+ * See if timeout. If so, tell the host to forget about it.
+ * In other words, we don't want a callback any more.
+ */
+ if (SCpnt->eh_state == SCSI_STATE_TIMEOUT) {
+ SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+ /*
+ * As far as the low level driver is
+ * concerned, this command is still active, so
+ * we must give the low level driver a chance
+ * to abort it. (DB)
+ *
+ * FIXME(eric) - we are not tracking whether we could
+ * abort a timed out command or not. Not sure how
+ * we should treat them differently anyways.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+ if (SCpnt->host->hostt->eh_abort_handler)
+ SCpnt->host->hostt->eh_abort_handler(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ SCpnt->request.rq_status = RQ_SCSI_DONE;
+ SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+
+ SCpnt->eh_state = FAILED;
+ }
+ SCSI_LOG_ERROR_RECOVERY(5, printk("send_eh_cmnd: %p eh_state:%x\n",
+ SCpnt, SCpnt->eh_state));
+ } else {
+ int temp;
+
+ /*
+ * We damn well had better never use this code. There is no timeout
+ * protection here, since we would end up waiting in the actual low
+ * level driver, we don't know how to wake it up.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+ temp = host->hostt->command(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ SCpnt->result = temp;
+ /* Fall through to code below to examine status. */
+ SCpnt->eh_state = SUCCESS;
+ }
+
+ /*
+ * Now examine the actual status codes to see whether the command actually
+ * did complete normally.
+ */
+ if (SCpnt->eh_state == SUCCESS) {
+ int ret = scsi_eh_completed_normally(SCpnt);
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("scsi_send_eh_cmnd: scsi_eh_completed_normally %x\n", ret));
+ switch (ret) {
+ case SUCCESS:
+ SCpnt->eh_state = SUCCESS;
+ break;
+ case NEEDS_RETRY:
+ goto retry;
+ case FAILED:
+ default:
+ SCpnt->eh_state = FAILED;
+ break;
+ }
+ } else {
+ SCpnt->eh_state = FAILED;
+ }
+}
+
+/*
+ * Function: scsi_unit_is_ready()
+ *
+ * Purpose: Called after TEST_UNIT_READY is run, to test to see if
+ * the unit responded in a way that indicates it is ready.
+ */
+STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt)
+{
+ if (SCpnt->result) {
+ if (((driver_byte(SCpnt->result) & DRIVER_SENSE) ||
+ (status_byte(SCpnt->result) & CHECK_CONDITION)) &&
+ ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) {
+ if (((SCpnt->sense_buffer[2] & 0xf) != NOT_READY) &&
+ ((SCpnt->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
+ ((SCpnt->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+/*
+ * Function: scsi_eh_finish_command
+ *
+ * Purpose: Handle a command that we are finished with WRT error handling.
+ *
+ * Arguments: SClist - pointer to list into which we are putting completed commands.
+ * SCpnt - command that is completing
+ *
+ * Notes: We don't want to use the normal command completion while we are
+ * are still handling errors - it may cause other commands to be queued,
+ * and that would disturb what we are doing. Thus we really want to keep
+ * a list of pending commands for final completion, and once we
+ * are ready to leave error handling we handle completion for real.
+ */
+STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt)
+{
+ SCpnt->state = SCSI_STATE_BHQUEUE;
+ SCpnt->bh_next = *SClist;
+ /*
+ * Set this back so that the upper level can correctly free up
+ * things.
+ */
+ SCpnt->use_sg = SCpnt->old_use_sg;
+ SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+ SCpnt->underflow = SCpnt->old_underflow;
+ *SClist = SCpnt;
+}
+
+/*
+ * Function: scsi_try_to_abort_command
+ *
+ * Purpose: Ask host adapter to abort a running command.
+ *
+ * Returns: FAILED Operation failed or not supported.
+ * SUCCESS Succeeded.
+ *
+ * Notes: This function will not return until the user's completion
+ * function has been called. There is no timeout on this
+ * operation. If the author of the low-level driver wishes
+ * this operation to be timed, they can provide this facility
+ * themselves. Helper functions in scsi_error.c can be supplied
+ * to make this easier to do.
+ *
+ * Notes: It may be possible to combine this with all of the reset
+ * handling to eliminate a lot of code duplication. I don't
+ * know what makes more sense at the moment - this is just a
+ * prototype.
+ */
+STATIC int scsi_try_to_abort_command(Scsi_Cmnd * SCpnt, int timeout)
+{
+ int rtn;
+ unsigned long flags;
+
+ SCpnt->eh_state = FAILED; /* Until we come up with something better */
+
+ if (SCpnt->host->hostt->eh_abort_handler == NULL) {
+ return FAILED;
+ }
+ /*
+ * scsi_done was called just after the command timed out and before
+ * we had a chance to process it. (DB)
+ */
+ if (SCpnt->serial_number == 0)
+ return SUCCESS;
+
+ SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return rtn;
+}
+
+/*
+ * Function: scsi_try_bus_device_reset
+ *
+ * Purpose: Ask host adapter to perform a bus device reset for a given
+ * device.
+ *
+ * Returns: FAILED Operation failed or not supported.
+ * SUCCESS Succeeded.
+ *
+ * Notes: There is no timeout for this operation. If this operation is
+ * unreliable for a given host, then the host itself needs to put a
+ * timer on it, and set the host back to a consistent state prior
+ * to returning.
+ */
+STATIC int scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt, int timeout)
+{
+ unsigned long flags;
+ int rtn;
+
+ SCpnt->eh_state = FAILED; /* Until we come up with something better */
+
+ if (SCpnt->host->hostt->eh_device_reset_handler == NULL) {
+ return FAILED;
+ }
+ SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ if (rtn == SUCCESS)
+ SCpnt->eh_state = SUCCESS;
+
+ return SCpnt->eh_state;
+}
+
+/*
+ * Function: scsi_try_bus_reset
+ *
+ * Purpose: Ask host adapter to perform a bus reset for a host.
+ *
+ * Returns: FAILED Operation failed or not supported.
+ * SUCCESS Succeeded.
+ *
+ * Notes:
+ */
+STATIC int scsi_try_bus_reset(Scsi_Cmnd * SCpnt)
+{
+ unsigned long flags;
+ int rtn;
+
+ SCpnt->eh_state = FAILED; /* Until we come up with something better */
+ SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+ SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+ if (SCpnt->host->hostt->eh_bus_reset_handler == NULL) {
+ return FAILED;
+ }
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ if (rtn == SUCCESS)
+ SCpnt->eh_state = SUCCESS;
+
+ /*
+ * If we had a successful bus reset, mark the command blocks to expect
+ * a condition code of unit attention.
+ */
+ scsi_sleep(BUS_RESET_SETTLE_TIME);
+ if (SCpnt->eh_state == SUCCESS) {
+ Scsi_Device *SDloop;
+ for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
+ if (SCpnt->channel == SDloop->channel) {
+ SDloop->was_reset = 1;
+ SDloop->expecting_cc_ua = 1;
+ }
+ }
+ }
+ return SCpnt->eh_state;
+}
+
+/*
+ * Function: scsi_try_host_reset
+ *
+ * Purpose: Ask host adapter to reset itself, and the bus.
+ *
+ * Returns: FAILED Operation failed or not supported.
+ * SUCCESS Succeeded.
+ *
+ * Notes:
+ */
+STATIC int scsi_try_host_reset(Scsi_Cmnd * SCpnt)
+{
+ unsigned long flags;
+ int rtn;
+
+ SCpnt->eh_state = FAILED; /* Until we come up with something better */
+ SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+ SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+ if (SCpnt->host->hostt->eh_host_reset_handler == NULL) {
+ return FAILED;
+ }
+ spin_lock_irqsave(&io_request_lock, flags);
+ rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ if (rtn == SUCCESS)
+ SCpnt->eh_state = SUCCESS;
+
+ /*
+ * If we had a successful host reset, mark the command blocks to expect
+ * a condition code of unit attention.
+ */
+ scsi_sleep(HOST_RESET_SETTLE_TIME);
+ if (SCpnt->eh_state == SUCCESS) {
+ Scsi_Device *SDloop;
+ for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
+ SDloop->was_reset = 1;
+ SDloop->expecting_cc_ua = 1;
+ }
+ }
+ return SCpnt->eh_state;
+}
+
+/*
+ * Function: scsi_decide_disposition
+ *
+ * Purpose: Examine a command block that has come back from the low-level
+ * and figure out what to do next.
+ *
+ * Returns: SUCCESS - pass on to upper level.
+ * FAILED - pass on to error handler thread.
+ * RETRY - command should be retried.
+ * SOFTERR - command succeeded, but we need to log
+ * a soft error.
+ *
+ * Notes: This is *ONLY* called when we are examining the status
+ * after sending out the actual data command. Any commands
+ * that are queued for error recovery (i.e. TEST_UNIT_READY)
+ * do *NOT* come through here.
+ *
+ * NOTE - When this routine returns FAILED, it means the error
+ * handler thread is woken. In cases where the error code
+ * indicates an error that doesn't require the error handler
+ * thread (i.e. we don't need to abort/reset), then this function
+ * should return SUCCESS.
+ */
+int scsi_decide_disposition(Scsi_Cmnd * SCpnt)
+{
+ int rtn;
+
+ /*
+ * If the device is offline, then we clearly just pass the result back
+ * up to the top level.
+ */
+ if (SCpnt->device->online == FALSE) {
+ SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: device offline - report as SUCCESS\n"));
+ return SUCCESS;
+ }
+ /*
+ * First check the host byte, to see if there is anything in there
+ * that would indicate what we need to do.
+ */
+
+ switch (host_byte(SCpnt->result)) {
+ case DID_PASSTHROUGH:
+ /*
+ * No matter what, pass this through to the upper layer.
+ * Nuke this special code so that it looks like we are saying
+ * DID_OK.
+ */
+ SCpnt->result &= 0xff00ffff;
+ return SUCCESS;
+ case DID_OK:
+ /*
+ * Looks good. Drop through, and check the next byte.
+ */
+ break;
+ case DID_NO_CONNECT:
+ case DID_BAD_TARGET:
+ case DID_ABORT:
+ /*
+ * Note - this means that we just report the status back to the
+ * top level driver, not that we actually think that it indicates
+ * success.
+ */
+ return SUCCESS;
+ /*
+ * When the low level driver returns DID_SOFT_ERROR,
+ * it is responsible for keeping an internal retry counter
+ * in order to avoid endless loops (DB)
+ *
+ * Actually this is a bug in this function here. We should
+ * be mindful of the maximum number of retries specified
+ * and not get stuck in a loop.
+ */
+ case DID_SOFT_ERROR:
+ goto maybe_retry;
+
+ case DID_ERROR:
+ if (msg_byte(SCpnt->result) == COMMAND_COMPLETE &&
+ status_byte(SCpnt->result) == RESERVATION_CONFLICT)
+ /*
+ * execute reservation conflict processing code
+ * lower down
+ */
+ break;
+ /* FALLTHROUGH */
+
+ case DID_BUS_BUSY:
+ case DID_PARITY:
+ goto maybe_retry;
+ case DID_TIME_OUT:
+ /*
+ * When we scan the bus, we get timeout messages for
+ * these commands if there is no device available.
+ * Other hosts report DID_NO_CONNECT for the same thing.
+ */
+ if ((SCpnt->cmnd[0] == TEST_UNIT_READY ||
+ SCpnt->cmnd[0] == INQUIRY)) {
+ return SUCCESS;
+ } else {
+ return FAILED;
+ }
+ case DID_RESET:
+ /*
+ * In the normal case where we haven't initiated a reset, this is
+ * a failure.
+ */
+ if (SCpnt->flags & IS_RESETTING) {
+ SCpnt->flags &= ~IS_RESETTING;
+ goto maybe_retry;
+ }
+ return SUCCESS;
+ default:
+ return FAILED;
+ }
+
+ /*
+ * Next, check the message byte.
+ */
+ if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
+ return FAILED;
+ }
+ /*
+ * Now, check the status byte to see if this indicates anything special.
+ */
+ switch (status_byte(SCpnt->result)) {
+ case QUEUE_FULL:
+ /*
+ * The case of trying to send too many commands to a tagged queueing
+ * device.
+ */
+ return ADD_TO_MLQUEUE;
+ case GOOD:
+ case COMMAND_TERMINATED:
+ return SUCCESS;
+ case CHECK_CONDITION:
+ rtn = scsi_check_sense(SCpnt);
+ if (rtn == NEEDS_RETRY) {
+ goto maybe_retry;
+ }
+ return rtn;
+ case CONDITION_GOOD:
+ case INTERMEDIATE_GOOD:
+ case INTERMEDIATE_C_GOOD:
+ /*
+ * Who knows? FIXME(eric)
+ */
+ return SUCCESS;
+ case BUSY:
+ goto maybe_retry;
+
+ case RESERVATION_CONFLICT:
+ printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n",
+ SCpnt->host->host_no, SCpnt->channel,
+ SCpnt->device->id, SCpnt->device->lun);
+ return SUCCESS; /* causes immediate I/O error */
+ default:
+ return FAILED;
+ }
+ return FAILED;
+
+ maybe_retry:
+
+ if ((++SCpnt->retries) < SCpnt->allowed) {
+ return NEEDS_RETRY;
+ } else {
+ /*
+ * No more retries - report this one back to upper level.
+ */
+ return SUCCESS;
+ }
+}
+
+/*
+ * Function: scsi_eh_completed_normally
+ *
+ * Purpose: Examine a command block that has come back from the low-level
+ * and figure out what to do next.
+ *
+ * Returns: SUCCESS - pass on to upper level.
+ * FAILED - pass on to error handler thread.
+ * RETRY - command should be retried.
+ * SOFTERR - command succeeded, but we need to log
+ * a soft error.
+ *
+ * Notes: This is *ONLY* called when we are examining the status
+ * of commands queued during error recovery. The main
+ * difference here is that we don't allow for the possibility
+ * of retries here, and we are a lot more restrictive about what
+ * we consider acceptable.
+ */
+STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt)
+{
+ /*
+ * First check the host byte, to see if there is anything in there
+ * that would indicate what we need to do.
+ */
+ if (host_byte(SCpnt->result) == DID_RESET) {
+ if (SCpnt->flags & IS_RESETTING) {
+ /*
+ * OK, this is normal. We don't know whether in fact the
+ * command in question really needs to be rerun or not -
+ * if this was the original data command then the answer is yes,
+ * otherwise we just flag it as success.
+ */
+ SCpnt->flags &= ~IS_RESETTING;
+ return NEEDS_RETRY;
+ }
+ /*
+ * Rats. We are already in the error handler, so we now get to try
+ * and figure out what to do next. If the sense is valid, we have
+ * a pretty good idea of what to do. If not, we mark it as failed.
+ */
+ return scsi_check_sense(SCpnt);
+ }
+ if (host_byte(SCpnt->result) != DID_OK) {
+ return FAILED;
+ }
+ /*
+ * Next, check the message byte.
+ */
+ if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
+ return FAILED;
+ }
+ /*
+ * Now, check the status byte to see if this indicates anything special.
+ */
+ switch (status_byte(SCpnt->result)) {
+ case GOOD:
+ case COMMAND_TERMINATED:
+ return SUCCESS;
+ case CHECK_CONDITION:
+ return scsi_check_sense(SCpnt);
+ case CONDITION_GOOD:
+ case INTERMEDIATE_GOOD:
+ case INTERMEDIATE_C_GOOD:
+ /*
+ * Who knows? FIXME(eric)
+ */
+ return SUCCESS;
+ case BUSY:
+ case QUEUE_FULL:
+ case RESERVATION_CONFLICT:
+ default:
+ return FAILED;
+ }
+ return FAILED;
+}
+
+/*
+ * Function: scsi_check_sense
+ *
+ * Purpose: Examine sense information - give suggestion as to what
+ * we should do with it.
+ */
+STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt)
+{
+ if (!scsi_sense_valid(SCpnt)) {
+ return FAILED;
+ }
+ if (SCpnt->sense_buffer[2] & 0xe0)
+ return SUCCESS;
+
+ switch (SCpnt->sense_buffer[2] & 0xf) {
+ case NO_SENSE:
+ return SUCCESS;
+ case RECOVERED_ERROR:
+ return /* SOFT_ERROR */ SUCCESS;
+
+ case ABORTED_COMMAND:
+ return NEEDS_RETRY;
+ case NOT_READY:
+ case UNIT_ATTENTION:
+ /*
+ * If we are expecting a CC/UA because of a bus reset that we
+ * performed, treat this just as a retry. Otherwise this is
+ * information that we should pass up to the upper-level driver
+ * so that we can deal with it there.
+ */
+ if (SCpnt->device->expecting_cc_ua) {
+ SCpnt->device->expecting_cc_ua = 0;
+ return NEEDS_RETRY;
+ }
+ /*
+ * If the device is in the process of becoming ready, we
+ * should retry.
+ */
+ if ((SCpnt->sense_buffer[12] == 0x04) &&
+ (SCpnt->sense_buffer[13] == 0x01)) {
+ return NEEDS_RETRY;
+ }
+ return SUCCESS;
+
+ /* these three are not supported */
+ case COPY_ABORTED:
+ case VOLUME_OVERFLOW:
+ case MISCOMPARE:
+ return SUCCESS;
+
+ case MEDIUM_ERROR:
+ return NEEDS_RETRY;
+
+ case ILLEGAL_REQUEST:
+ case BLANK_CHECK:
+ case DATA_PROTECT:
+ case HARDWARE_ERROR:
+ default:
+ return SUCCESS;
+ }
+}
+
+
+/*
+ * Function: scsi_restart_operations
+ *
+ * Purpose: Restart IO operations to the specified host.
+ *
+ * Arguments: host - host that we are restarting
+ *
+ * Lock status: Assumed that locks are not held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: When we entered the error handler, we blocked all further
+ * I/O to this device. We need to 'reverse' this process.
+ */
+STATIC void scsi_restart_operations(struct Scsi_Host *host)
+{
+ Scsi_Device *SDpnt;
+ unsigned long flags;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ /*
+ * Next free up anything directly waiting upon the host. This will be
+ * requests for character device operations, and also for ioctls to queued
+ * block devices.
+ */
+ SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n"));
+#if 0
+ wake_up(&host->host_wait);
+#endif
+
+ /*
+ * Finally we need to re-initiate requests that may be pending. We will
+ * have had everything blocked while error handling is taking place, and
+ * now that error recovery is done, we will need to ensure that these
+ * requests are started.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ request_queue_t *q;
+ if ((host->can_queue > 0 && (host->host_busy >= host->can_queue))
+ || (host->host_blocked)
+ || (host->host_self_blocked)
+ || (SDpnt->device_blocked)) {
+ break;
+ }
+ q = &SDpnt->request_queue;
+ q->request_fn(q);
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Function: scsi_unjam_host
+ *
+ * Purpose: Attempt to fix a host which has a command that failed for
+ * some reason.
+ *
+ * Arguments: host - host that needs unjamming.
+ *
+ * Returns: Nothing
+ *
+ * Notes: When we come in here, we *know* that all commands on the
+ * bus have either completed, failed or timed out. We also
+ * know that no further commands are being sent to the host,
+ * so things are relatively quiet and we have freedom to
+ * fiddle with things as we wish.
+ *
+ * Additional note: This is only the *default* implementation. It is possible
+ * for individual drivers to supply their own version of this
+ * function, and if the maintainer wishes to do this, it is
+ * strongly suggested that this function be taken as a template
+ * and modified. This function was designed to correctly handle
+ * problems for about 95% of the different cases out there, and
+ * it should always provide at least a reasonable amount of error
+ * recovery.
+ *
+ * Note3: Any command marked 'FAILED' or 'TIMEOUT' must eventually
+ * have scsi_finish_command() called for it. We do all of
+ * the retry stuff here, so when we restart the host after we
+ * return it should have an empty queue.
+ */
+STATIC int scsi_unjam_host(struct Scsi_Host *host)
+{
+ int devices_failed;
+ int numfailed;
+ int ourrtn;
+ int rtn = FALSE;
+ int result;
+ Scsi_Cmnd *SCloop;
+ Scsi_Cmnd *SCpnt;
+ Scsi_Device *SDpnt;
+ Scsi_Device *SDloop;
+ Scsi_Cmnd *SCdone;
+ int timed_out;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ SCdone = NULL;
+
+ /*
+ * First, protect against any sort of race condition. If any of the outstanding
+ * commands are in states that indicate that we are not yet blocked (i.e. we are
+ * not in a quiet state) then we got woken up in error. If we ever end up here,
+ * we need to re-examine some of the assumptions.
+ */
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+ if (SCpnt->state == SCSI_STATE_FAILED
+ || SCpnt->state == SCSI_STATE_TIMEOUT
+ || SCpnt->state == SCSI_STATE_INITIALIZING
+ || SCpnt->state == SCSI_STATE_UNUSED) {
+ continue;
+ }
+ /*
+ * Rats. Something is still floating around out there. This could
+ * be the result of the fact that the upper level drivers are still frobbing
+ * commands that might have succeeded. There are two outcomes. One is that
+ * the command block will eventually be freed, and the other one is that
+ * the command will be queued and will be finished along the way.
+ */
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));
+
+/*
+ * panic("SCSI Error handler woken too early\n");
+ *
+ * This is no longer a problem, since now the code cares only about
+ * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED.
+ * Other states are useful only to release active commands when devices are
+ * set offline. If (host->host_active == host->host_busy) we can safely assume
+ * that there are no commands in state other then TIMEOUT od FAILED. (DB)
+ *
+ * FIXME:
+ * It is not easy to release correctly commands according to their state when
+ * devices are set offline, when the state is neither TIMEOUT nor FAILED.
+ * When a device is set offline, we can have some command with
+ * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL,
+ * state=SCSI_STATE_INITIALIZING and the driver module cannot be released.
+ * (DB, 17 May 1998)
+ */
+ }
+ }
+
+ /*
+ * Next, see if we need to request sense information. if so,
+ * then get it now, so we have a better idea of what to do.
+ * FIXME(eric) this has the unfortunate side effect that if a host
+ * adapter does not automatically request sense information, that we end
+ * up shutting it down before we request it. All hosts should be doing this
+ * anyways, so for now all I have to say is tough noogies if you end up in here.
+ * On second thought, this is probably a good idea. We *really* want to give
+ * authors an incentive to automatically request this.
+ */
+ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n"));
+
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+ if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) {
+ continue;
+ }
+ SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n",
+ SCpnt->target));
+ rtn = scsi_request_sense(SCpnt);
+ if (rtn != SUCCESS) {
+ continue;
+ }
+ SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n",
+ SCpnt, SCpnt->result));
+ SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt));
+
+ result = scsi_decide_disposition(SCpnt);
+
+ /*
+ * If the result was normal, then just pass it along to the
+ * upper level.
+ */
+ if (result == SUCCESS) {
+ SCpnt->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCpnt);
+ }
+ if (result != NEEDS_RETRY) {
+ continue;
+ }
+ /*
+ * We only come in here if we want to retry a
+ * command. The test to see whether the command
+ * should be retried should be keeping track of the
+ * number of tries, so we don't end up looping, of
+ * course.
+ */
+ SCpnt->state = NEEDS_RETRY;
+ rtn = scsi_eh_retry_command(SCpnt);
+ if (rtn != SUCCESS) {
+ continue;
+ }
+ /*
+ * We eventually hand this one back to the top level.
+ */
+ SCpnt->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCpnt);
+ }
+ }
+
+ /*
+ * Go through the list of commands and figure out where we stand and how bad things
+ * really are.
+ */
+ numfailed = 0;
+ timed_out = 0;
+ devices_failed = 0;
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ unsigned int device_error = 0;
+
+ for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+ if (SCpnt->state == SCSI_STATE_FAILED) {
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n",
+ SCpnt->target));
+ numfailed++;
+ device_error++;
+ }
+ if (SCpnt->state == SCSI_STATE_TIMEOUT) {
+ SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n",
+ SCpnt->target));
+ timed_out++;
+ device_error++;
+ }
+ }
+ if (device_error > 0) {
+ devices_failed++;
+ }
+ }
+
+ SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n",
+ numfailed, timed_out, devices_failed));
+
+ if (host->host_failed == 0) {
+ ourrtn = TRUE;
+ goto leave;
+ }
+ /*
+ * Next, try and see whether or not it makes sense to try and abort
+ * the running command. This only works out to be the case if we have
+ * one command that has timed out. If the command simply failed, it
+ * makes no sense to try and abort the command, since as far as the
+ * host adapter is concerned, it isn't running.
+ */
+
+ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n"));
+
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+ if (SCloop->state != SCSI_STATE_TIMEOUT) {
+ continue;
+ }
+ rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT);
+ if (rtn == SUCCESS) {
+ rtn = scsi_test_unit_ready(SCloop);
+
+ if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+ rtn = scsi_eh_retry_command(SCloop);
+
+ if (rtn == SUCCESS) {
+ SCloop->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCloop);
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * If we have corrected all of the problems, then we are done.
+ */
+ if (host->host_failed == 0) {
+ ourrtn = TRUE;
+ goto leave;
+ }
+ /*
+ * Either the abort wasn't appropriate, or it didn't succeed.
+ * Now try a bus device reset. Still, look to see whether we have
+ * multiple devices that are jammed or not - if we have multiple devices,
+ * it makes no sense to try BUS_DEVICE_RESET - we really would need
+ * to try a BUS_RESET instead.
+ *
+ * Does this make sense - should we try BDR on each device individually?
+ * Yes, definitely.
+ */
+ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try BDR\n"));
+
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+ if (SCloop->state == SCSI_STATE_FAILED
+ || SCloop->state == SCSI_STATE_TIMEOUT) {
+ break;
+ }
+ }
+
+ if (SCloop == NULL) {
+ continue;
+ }
+ /*
+ * OK, we have a device that is having problems. Try and send
+ * a bus device reset to it.
+ *
+ * FIXME(eric) - make sure we handle the case where multiple
+ * commands to the same device have failed. They all must
+ * get properly restarted.
+ */
+ rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT);
+
+ if (rtn == SUCCESS) {
+ rtn = scsi_test_unit_ready(SCloop);
+
+ if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+ rtn = scsi_eh_retry_command(SCloop);
+
+ if (rtn == SUCCESS) {
+ SCloop->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCloop);
+ }
+ }
+ }
+ }
+
+ if (host->host_failed == 0) {
+ ourrtn = TRUE;
+ goto leave;
+ }
+ /*
+ * If we ended up here, we have serious problems. The only thing left
+ * to try is a full bus reset. If someone has grabbed the bus and isn't
+ * letting go, then perhaps this will help.
+ */
+ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard bus reset\n"));
+
+ /*
+ * We really want to loop over the various channels, and do this on
+ * a channel by channel basis. We should also check to see if any
+ * of the failed commands are on soft_reset devices, and if so, skip
+ * the reset.
+ */
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ next_device:
+ for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+ if (SCpnt->state != SCSI_STATE_FAILED
+ && SCpnt->state != SCSI_STATE_TIMEOUT) {
+ continue;
+ }
+ /*
+ * We have a failed command. Make sure there are no other failed
+ * commands on the same channel that are timed out and implement a
+ * soft reset.
+ */
+ for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+ for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+ if (SCloop->channel != SCpnt->channel) {
+ continue;
+ }
+ if (SCloop->state != SCSI_STATE_FAILED
+ && SCloop->state != SCSI_STATE_TIMEOUT) {
+ continue;
+ }
+ if (SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT) {
+ /*
+ * If this device uses the soft reset option, and this
+ * is one of the devices acting up, then our only
+ * option is to wait a bit, since the command is
+ * supposedly still running.
+ *
+ * FIXME(eric) - right now we will just end up falling
+ * through to the 'take device offline' case.
+ *
+ * FIXME(eric) - It is possible that the command completed
+ * *after* the error recovery procedure started, and if this
+ * is the case, we are worrying about nothing here.
+ */
+
+ scsi_sleep(1 * HZ);
+ goto next_device;
+ }
+ }
+ }
+
+ /*
+ * We now know that we are able to perform a reset for the
+ * bus that SCpnt points to. There are no soft-reset devices
+ * with outstanding timed out commands.
+ */
+ rtn = scsi_try_bus_reset(SCpnt);
+ if (rtn == SUCCESS) {
+ for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+ for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+ if (SCloop->channel != SCpnt->channel) {
+ continue;
+ }
+ if (SCloop->state != SCSI_STATE_FAILED
+ && SCloop->state != SCSI_STATE_TIMEOUT) {
+ continue;
+ }
+ rtn = scsi_test_unit_ready(SCloop);
+
+ if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+ rtn = scsi_eh_retry_command(SCloop);
+
+ if (rtn == SUCCESS) {
+ SCpnt->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCloop);
+ }
+ }
+ /*
+ * If the bus reset worked, but we are still unable to
+ * talk to the device, take it offline.
+ * FIXME(eric) - is this really the correct thing to do?
+ */
+ if (rtn != SUCCESS) {
+ printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after bus reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+
+ SDloop->online = FALSE;
+ SDloop->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCloop);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (host->host_failed == 0) {
+ ourrtn = TRUE;
+ goto leave;
+ }
+ /*
+ * If we ended up here, we have serious problems. The only thing left
+ * to try is a full host reset - perhaps the firmware on the device
+ * crashed, or something like that.
+ *
+ * It is assumed that a succesful host reset will cause *all* information
+ * about the command to be flushed from both the host adapter *and* the
+ * device.
+ *
+ * FIXME(eric) - it isn't clear that devices that implement the soft reset
+ * option can ever be cleared except via cycling the power. The problem is
+ * that sending the host reset command will cause the host to forget
+ * about the pending command, but the device won't forget. For now, we
+ * skip the host reset option if any of the failed devices are configured
+ * to use the soft reset option.
+ */
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ next_device2:
+ for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+ if (SCpnt->state != SCSI_STATE_FAILED
+ && SCpnt->state != SCSI_STATE_TIMEOUT) {
+ continue;
+ }
+ if (SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT) {
+ /*
+ * If this device uses the soft reset option, and this
+ * is one of the devices acting up, then our only
+ * option is to wait a bit, since the command is
+ * supposedly still running.
+ *
+ * FIXME(eric) - right now we will just end up falling
+ * through to the 'take device offline' case.
+ */
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("scsi_unjam_host: Unable to try hard host reset\n"));
+
+ /*
+ * Due to the spinlock, we will never get out of this
+ * loop without a proper wait. (DB)
+ */
+ scsi_sleep(1 * HZ);
+
+ goto next_device2;
+ }
+ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard host reset\n"));
+
+ /*
+ * FIXME(eric) - we need to obtain a valid SCpnt to perform this call.
+ */
+ rtn = scsi_try_host_reset(SCpnt);
+ if (rtn == SUCCESS) {
+ /*
+ * FIXME(eric) we assume that all commands are flushed from the
+ * controller. We should get a DID_RESET for all of the commands
+ * that were pending. We should ignore these so that we can
+ * guarantee that we are in a consistent state.
+ *
+ * I believe this to be the case right now, but this needs to be
+ * tested.
+ */
+ for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+ for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+ if (SCloop->state != SCSI_STATE_FAILED
+ && SCloop->state != SCSI_STATE_TIMEOUT) {
+ continue;
+ }
+ rtn = scsi_test_unit_ready(SCloop);
+
+ if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+ rtn = scsi_eh_retry_command(SCloop);
+
+ if (rtn == SUCCESS) {
+ SCpnt->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCloop);
+ }
+ }
+ if (rtn != SUCCESS) {
+ printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+ SDloop->online = FALSE;
+ SDloop->host->host_failed--;
+ scsi_eh_finish_command(&SCdone, SCloop);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * If we solved all of the problems, then let's rev up the engines again.
+ */
+ if (host->host_failed == 0) {
+ ourrtn = TRUE;
+ goto leave;
+ }
+ /*
+ * If the HOST RESET failed, then for now we assume that the entire host
+ * adapter is too hosed to be of any use. For our purposes, however, it is
+ * easier to simply take the devices offline that correspond to commands
+ * that failed.
+ */
+ SCSI_LOG_ERROR_RECOVERY(1, printk("scsi_unjam_host: Take device offline\n"));
+
+ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+ if (SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT) {
+ SDloop = SCloop->device;
+ if (SDloop->online == TRUE) {
+ printk(KERN_INFO "scsi: device set offline - command error recover failed: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+ SDloop->online = FALSE;
+ }
+
+ /*
+ * This should pass the failure up to the top level driver, and
+ * it will have to try and do something intelligent with it.
+ */
+ SCloop->host->host_failed--;
+
+ if (SCloop->state == SCSI_STATE_TIMEOUT) {
+ SCloop->result |= (DRIVER_TIMEOUT << 24);
+ }
+ SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n",
+ SDloop->id, SCloop->result));
+
+ scsi_eh_finish_command(&SCdone, SCloop);
+ }
+ }
+ }
+
+ if (host->host_failed != 0) {
+ panic("scsi_unjam_host: Miscount of number of failed commands.\n");
+ }
+ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n"));
+
+ ourrtn = FALSE;
+
+ leave:
+
+ /*
+ * We should have a list of commands that we 'finished' during the course of
+ * error recovery. This should be the same as the list of commands that timed out
+ * or failed. We are currently holding these things in a linked list - we didn't
+ * put them in the bottom half queue because we wanted to keep things quiet while
+ * we were working on recovery, and passing them up to the top level could easily
+ * cause the top level to try and queue something else again.
+ *
+ * Start by marking that the host is no longer in error recovery.
+ */
+ host->in_recovery = 0;
+
+ /*
+ * Take the list of commands, and stick them in the bottom half queue.
+ * The current implementation of scsi_done will do this for us - if need
+ * be we can create a special version of this function to do the
+ * same job for us.
+ */
+ for (SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone) {
+ SCdone = SCpnt->bh_next;
+ SCpnt->bh_next = NULL;
+ /*
+ * Oh, this is a vile hack. scsi_done() expects a timer
+ * to be running on the command. If there isn't, it assumes
+ * that the command has actually timed out, and a timer
+ * handler is running. That may well be how we got into
+ * this fix, but right now things are stable. We add
+ * a timer back again so that we can report completion.
+ * scsi_done() will immediately remove said timer from
+ * the command, and then process it.
+ */
+ scsi_add_timer(SCpnt, 100, scsi_eh_times_out);
+ scsi_done(SCpnt);
+ }
+
+ return (ourrtn);
+}
+
+
+/*
+ * Function: scsi_error_handler
+ *
+ * Purpose: Handle errors/timeouts of scsi commands, try and clean up
+ * and unjam the bus, and restart things.
+ *
+ * Arguments: host - host for which we are running.
+ *
+ * Returns: Never returns.
+ *
+ * Notes: This is always run in the context of a kernel thread. The
+ * idea is that we start this thing up when the kernel starts
+ * up (one per host that we detect), and it immediately goes to
+ * sleep and waits for some event (i.e. failure). When this
+ * takes place, we have the job of trying to unjam the bus
+ * and restarting things.
+ *
+ */
+void scsi_error_handler(void *data)
+{
+ struct Scsi_Host *host = (struct Scsi_Host *) data;
+ int rtn;
+#if 0
+ DECLARE_MUTEX_LOCKED(sem);
+
+ /*
+ * We only listen to signals if the HA was loaded as a module.
+ * If the HA was compiled into the kernel, then we don't listen
+ * to any signals.
+ */
+ if( host->loaded_as_module ) {
+ siginitsetinv(&current->blocked, SHUTDOWN_SIGS);
+ } else {
+ siginitsetinv(&current->blocked, 0);
+ }
+
+ lock_kernel();
+
+ /*
+ * Flush resources
+ */
+
+ daemonize();
+ reparent_to_init();
+
+ /*
+ * Set the name of this process.
+ */
+
+ sprintf(current->comm, "scsi_eh_%d", host->host_no);
+
+ host->eh_wait = &sem;
+#else
+ host->eh_wait = (void *)0xDEADBEEF;
+#endif
+ host->ehandler = current;
+
+#if 0
+ unlock_kernel();
+#endif
+
+ /*
+ * Wake up the thread that created us.
+ */
+ SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n", host->eh_notify->count.counter));
+
+#if 0
+ up(host->eh_notify);
+#endif
+
+ while (1) {
+ /*
+ * If we get a signal, it means we are supposed to go
+ * away and die. This typically happens if the user is
+ * trying to unload a module.
+ */
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler sleeping\n"));
+
+#if 0
+ /*
+ * Note - we always use down_interruptible with the semaphore
+ * even if the module was loaded as part of the kernel. The
+ * reason is that down() will cause this thread to be counted
+ * in the load average as a running process, and down
+ * interruptible doesn't. Given that we need to allow this
+ * thread to die if the driver was loaded as a module, using
+ * semaphores isn't unreasonable.
+ */
+ down_interruptible(&sem);
+#endif
+ if( host->loaded_as_module ) {
+ if (signal_pending(current))
+ break;
+ }
+
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler waking up\n"));
+
+ host->eh_active = 1;
+
+ /*
+ * We have a host that is failing for some reason. Figure out
+ * what we need to do to get it up and online again (if we can).
+ * If we fail, we end up taking the thing offline.
+ */
+ if (host->hostt->eh_strategy_handler != NULL) {
+ rtn = host->hostt->eh_strategy_handler(host);
+ } else {
+ rtn = scsi_unjam_host(host);
+ }
+
+ host->eh_active = 0;
+
+ /*
+ * Note - if the above fails completely, the action is to take
+ * individual devices offline and flush the queue of any
+ * outstanding requests that may have been pending. When we
+ * restart, we restart any I/O to any other devices on the bus
+ * which are still online.
+ */
+ scsi_restart_operations(host);
+
+ }
+
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler exiting\n"));
+
+ /*
+ * Make sure that nobody tries to wake us up again.
+ */
+ host->eh_wait = NULL;
+
+ /*
+ * Knock this down too. From this point on, the host is flying
+ * without a pilot. If this is because the module is being unloaded,
+ * that's fine. If the user sent a signal to this thing, we are
+ * potentially in real danger.
+ */
+ host->in_recovery = 0;
+ host->eh_active = 0;
+ host->ehandler = NULL;
+
+#if 0
+ /*
+ * If anyone is waiting for us to exit (i.e. someone trying to unload
+ * a driver), then wake up that process to let them know we are on
+ * the way out the door. This may be overkill - I *think* that we
+ * could probably just unload the driver and send the signal, and when
+ * the error handling thread wakes up that it would just exit without
+ * needing to touch any memory associated with the driver itself.
+ */
+ if (host->eh_notify != NULL)
+ up(host->eh_notify);
+#endif
+}
+
+/*
+ * Function: scsi_new_reset
+ *
+ * Purpose: Send requested reset to a bus or device at any phase.
+ *
+ * Arguments: SCpnt - command ptr to send reset with (usually a dummy)
+ * flag - reset type (see scsi.h)
+ *
+ * Returns: SUCCESS/FAILURE.
+ *
+ * Notes: This is used by the SCSI Generic driver to provide
+ * Bus/Device reset capability.
+ */
+int
+scsi_new_reset(Scsi_Cmnd *SCpnt, int flag)
+{
+ int rtn;
+
+ switch(flag) {
+ case SCSI_TRY_RESET_DEVICE:
+ rtn = scsi_try_bus_device_reset(SCpnt, 0);
+ if (rtn == SUCCESS)
+ break;
+ /* FALLTHROUGH */
+ case SCSI_TRY_RESET_BUS:
+ rtn = scsi_try_bus_reset(SCpnt);
+ if (rtn == SUCCESS)
+ break;
+ /* FALLTHROUGH */
+ case SCSI_TRY_RESET_HOST:
+ rtn = scsi_try_host_reset(SCpnt);
+ break;
+ default:
+ rtn = FAILED;
+ }
+
+ return rtn;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_ioctl.c b/xen/drivers/scsi/scsi_ioctl.c
new file mode 100644
index 0000000000..7f1df6e8f1
--- /dev/null
+++ b/xen/drivers/scsi/scsi_ioctl.c
@@ -0,0 +1,538 @@
+/*
+ * Changes:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 08/23/2000
+ * - get rid of some verify_areas and use __copy*user and __get/put_user
+ * for the ones that remain
+ */
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/page.h>
+
+/* #include <linux/interrupt.h> */
+/* #include <linux/errno.h> */
+/* #include <linux/kernel.h> */
+#include <xeno/sched.h>
+/* #include <linux/mm.h> */
+/* #include <linux/string.h> */
+
+#include <xeno/blk.h>
+#include "scsi.h"
+#include "hosts.h"
+#include <scsi/scsi_ioctl.h>
+
+#define NORMAL_RETRIES 5
+#define IOCTL_NORMAL_TIMEOUT (10 * HZ)
+#define FORMAT_UNIT_TIMEOUT (2 * 60 * 60 * HZ)
+#define START_STOP_TIMEOUT (60 * HZ)
+#define MOVE_MEDIUM_TIMEOUT (5 * 60 * HZ)
+#define READ_ELEMENT_STATUS_TIMEOUT (5 * 60 * HZ)
+#define READ_DEFECT_DATA_TIMEOUT (60 * HZ ) /* ZIP-250 on parallel port takes as long! */
+
+#define MAX_BUF PAGE_SIZE
+
+/*
+ * If we are told to probe a host, we will return 0 if the host is not
+ * present, 1 if the host is present, and will return an identifying
+ * string at *arg, if arg is non null, filling to the length stored at
+ * (int *) arg
+ */
+
+static int ioctl_probe(struct Scsi_Host *host, void *buffer)
+{
+ unsigned int len, slen;
+ const char *string;
+ int temp = host->hostt->present;
+
+ if (temp && buffer) {
+ if (get_user(len, (unsigned int *) buffer))
+ return -EFAULT;
+
+ if (host->hostt->info)
+ string = host->hostt->info(host);
+ else
+ string = host->hostt->name;
+ if (string) {
+ slen = strlen(string);
+ if (len > slen)
+ len = slen + 1;
+ if (copy_to_user(buffer, string, len))
+ return -EFAULT;
+ }
+ }
+ return temp;
+}
+
+/*
+
+ * The SCSI_IOCTL_SEND_COMMAND ioctl sends a command out to the SCSI host.
+ * The IOCTL_NORMAL_TIMEOUT and NORMAL_RETRIES variables are used.
+ *
+ * dev is the SCSI device struct ptr, *(int *) arg is the length of the
+ * input data, if any, not including the command string & counts,
+ * *((int *)arg + 1) is the output buffer size in bytes.
+ *
+ * *(char *) ((int *) arg)[2] the actual command byte.
+ *
+ * Note that if more than MAX_BUF bytes are requested to be transferred,
+ * the ioctl will fail with error EINVAL. MAX_BUF can be increased in
+ * the future by increasing the size that scsi_malloc will accept.
+ *
+ * This size *does not* include the initial lengths that were passed.
+ *
+ * The SCSI command is read from the memory location immediately after the
+ * length words, and the input data is right after the command. The SCSI
+ * routines know the command size based on the opcode decode.
+ *
+ * The output area is then filled in starting from the command byte.
+ */
+
+static int ioctl_internal_command(Scsi_Device * dev, char *cmd,
+ int timeout, int retries)
+{
+ int result;
+ Scsi_Request *SRpnt;
+ Scsi_Device *SDpnt;
+
+
+ SCSI_LOG_IOCTL(1, printk("Trying ioctl with scsi command %d\n", cmd[0]));
+ if (NULL == (SRpnt = scsi_allocate_request(dev))) {
+ printk("SCSI internal ioctl failed, no memory\n");
+ return -ENOMEM;
+ }
+
+ SRpnt->sr_data_direction = SCSI_DATA_NONE;
+ scsi_wait_req(SRpnt, cmd, NULL, 0, timeout, retries);
+
+ SCSI_LOG_IOCTL(2, printk("Ioctl returned 0x%x\n", SRpnt->sr_result));
+
+ if (driver_byte(SRpnt->sr_result) != 0)
+ switch (SRpnt->sr_sense_buffer[2] & 0xf) {
+ case ILLEGAL_REQUEST:
+ if (cmd[0] == ALLOW_MEDIUM_REMOVAL)
+ dev->lockable = 0;
+ else
+ printk("SCSI device (ioctl) reports ILLEGAL REQUEST.\n");
+ break;
+ case NOT_READY: /* This happens if there is no disc in drive */
+ if (dev->removable && (cmd[0] != TEST_UNIT_READY)) {
+ printk(KERN_INFO "Device not ready. Make sure there is a disc in the drive.\n");
+ break;
+ }
+ case UNIT_ATTENTION:
+ if (dev->removable) {
+ dev->changed = 1;
+ SRpnt->sr_result = 0; /* This is no longer considered an error */
+ /* gag this error, VFS will log it anyway /axboe */
+ /* printk(KERN_INFO "Disc change detected.\n"); */
+ break;
+ };
+ default: /* Fall through for non-removable media */
+ printk("SCSI error: host %d id %d lun %d return code = %x\n",
+ dev->host->host_no,
+ dev->id,
+ dev->lun,
+ SRpnt->sr_result);
+ printk("\tSense class %x, sense error %x, extended sense %x\n",
+ sense_class(SRpnt->sr_sense_buffer[0]),
+ sense_error(SRpnt->sr_sense_buffer[0]),
+ SRpnt->sr_sense_buffer[2] & 0xf);
+
+ };
+
+ result = SRpnt->sr_result;
+
+ SCSI_LOG_IOCTL(2, printk("IOCTL Releasing command\n"));
+ SDpnt = SRpnt->sr_device;
+ scsi_release_request(SRpnt);
+ SRpnt = NULL;
+
+ return result;
+}
+
+/*
+ * This interface is depreciated - users should use the scsi generic (sg)
+ * interface instead, as this is a more flexible approach to performing
+ * generic SCSI commands on a device.
+ *
+ * The structure that we are passed should look like:
+ *
+ * struct sdata {
+ * unsigned int inlen; [i] Length of data to be written to device
+ * unsigned int outlen; [i] Length of data to be read from device
+ * unsigned char cmd[x]; [i] SCSI command (6 <= x <= 12).
+ * [o] Data read from device starts here.
+ * [o] On error, sense buffer starts here.
+ * unsigned char wdata[y]; [i] Data written to device starts here.
+ * };
+ * Notes:
+ * - The SCSI command length is determined by examining the 1st byte
+ * of the given command. There is no way to override this.
+ * - Data transfers are limited to PAGE_SIZE (4K on i386, 8K on alpha).
+ * - The length (x + y) must be at least OMAX_SB_LEN bytes long to
+ * accomodate the sense buffer when an error occurs.
+ * The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that
+ * old code will not be surprised.
+ * - If a Unix error occurs (e.g. ENOMEM) then the user will receive
+ * a negative return and the Unix error code in 'errno'.
+ * If the SCSI command succeeds then 0 is returned.
+ * Positive numbers returned are the compacted SCSI error codes (4
+ * bytes in one int) where the lowest byte is the SCSI status.
+ * See the drivers/scsi/scsi.h file for more information on this.
+ *
+ */
+#define OMAX_SB_LEN 16 /* Old sense buffer length */
+
+int scsi_ioctl_send_command(Scsi_Device * dev, Scsi_Ioctl_Command * sic)
+{
+ char *buf;
+ unsigned char cmd[MAX_COMMAND_SIZE];
+ char *cmd_in;
+ Scsi_Request *SRpnt;
+ Scsi_Device *SDpnt;
+ unsigned char opcode;
+ unsigned int inlen, outlen, cmdlen;
+ unsigned int needed, buf_needed;
+ int timeout, retries, result;
+ int data_direction;
+
+ if (!sic)
+ return -EINVAL;
+ /*
+ * Verify that we can read at least this much.
+ */
+ if (verify_area(VERIFY_READ, sic, sizeof(Scsi_Ioctl_Command)))
+ return -EFAULT;
+
+ if(__get_user(inlen, &sic->inlen))
+ return -EFAULT;
+
+ if(__get_user(outlen, &sic->outlen))
+ return -EFAULT;
+
+ /*
+ * We do not transfer more than MAX_BUF with this interface.
+ * If the user needs to transfer more data than this, they
+ * should use scsi_generics (sg) instead.
+ */
+ if (inlen > MAX_BUF)
+ return -EINVAL;
+ if (outlen > MAX_BUF)
+ return -EINVAL;
+
+ cmd_in = sic->data;
+ if(get_user(opcode, cmd_in))
+ return -EFAULT;
+
+ needed = buf_needed = (inlen > outlen ? inlen : outlen);
+ if (buf_needed) {
+ buf_needed = (buf_needed + 511) & ~511;
+ if (buf_needed > MAX_BUF)
+ buf_needed = MAX_BUF;
+ buf = (char *) scsi_malloc(buf_needed);
+ if (!buf)
+ return -ENOMEM;
+ memset(buf, 0, buf_needed);
+ if( inlen == 0 ) {
+ data_direction = SCSI_DATA_READ;
+ } else if (outlen == 0 ) {
+ data_direction = SCSI_DATA_WRITE;
+ } else {
+ /*
+ * Can this ever happen?
+ */
+ data_direction = SCSI_DATA_UNKNOWN;
+ }
+
+ } else {
+ buf = NULL;
+ data_direction = SCSI_DATA_NONE;
+ }
+
+ /*
+ * Obtain the command from the user's address space.
+ */
+ cmdlen = COMMAND_SIZE(opcode);
+
+ result = -EFAULT;
+
+ if (verify_area(VERIFY_READ, cmd_in, cmdlen + inlen))
+ goto error;
+
+ if(__copy_from_user(cmd, cmd_in, cmdlen))
+ goto error;
+
+ /*
+ * Obtain the data to be sent to the device (if any).
+ */
+
+ if(copy_from_user(buf, cmd_in + cmdlen, inlen))
+ goto error;
+
+ /*
+ * Set the lun field to the correct value.
+ */
+ if (dev->scsi_level <= SCSI_2)
+ cmd[1] = (cmd[1] & 0x1f) | (dev->lun << 5);
+
+ switch (opcode) {
+ case FORMAT_UNIT:
+ timeout = FORMAT_UNIT_TIMEOUT;
+ retries = 1;
+ break;
+ case START_STOP:
+ timeout = START_STOP_TIMEOUT;
+ retries = NORMAL_RETRIES;
+ break;
+ case MOVE_MEDIUM:
+ timeout = MOVE_MEDIUM_TIMEOUT;
+ retries = NORMAL_RETRIES;
+ break;
+ case READ_ELEMENT_STATUS:
+ timeout = READ_ELEMENT_STATUS_TIMEOUT;
+ retries = NORMAL_RETRIES;
+ break;
+ case READ_DEFECT_DATA:
+ timeout = READ_DEFECT_DATA_TIMEOUT;
+ retries = 1;
+ break;
+ default:
+ timeout = IOCTL_NORMAL_TIMEOUT;
+ retries = NORMAL_RETRIES;
+ break;
+ }
+
+#ifndef DEBUG_NO_CMD
+
+
+ SRpnt = scsi_allocate_request(dev);
+ if( SRpnt == NULL )
+ {
+ result = -EINTR;
+ goto error;
+ }
+
+ SRpnt->sr_data_direction = data_direction;
+ scsi_wait_req(SRpnt, cmd, buf, needed, timeout, retries);
+
+ /*
+ * If there was an error condition, pass the info back to the user.
+ */
+
+ result = SRpnt->sr_result;
+
+ if (SRpnt->sr_result) {
+ int sb_len = sizeof(SRpnt->sr_sense_buffer);
+
+ sb_len = (sb_len > OMAX_SB_LEN) ? OMAX_SB_LEN : sb_len;
+ if (copy_to_user(cmd_in, SRpnt->sr_sense_buffer, sb_len))
+ result = -EFAULT;
+ } else {
+ if (copy_to_user(cmd_in, buf, outlen))
+ result = -EFAULT;
+ }
+
+ SDpnt = SRpnt->sr_device;
+ scsi_release_request(SRpnt);
+ SRpnt = NULL;
+
+error:
+ if (buf)
+ scsi_free(buf, buf_needed);
+
+
+ return result;
+#else
+ {
+ int i;
+ printk("scsi_ioctl : device %d. command = ", dev->id);
+ for (i = 0; i < cmdlen; ++i)
+ printk("%02x ", cmd[i]);
+ printk("\nbuffer =");
+ for (i = 0; i < 20; ++i)
+ printk("%02x ", buf[i]);
+ printk("\n");
+ printk("inlen = %d, outlen = %d, cmdlen = %d\n",
+ inlen, outlen, cmdlen);
+ printk("buffer = %d, cmd_in = %d\n", buffer, cmd_in);
+ }
+ return 0;
+#endif
+}
+
+/*
+ * The scsi_ioctl_get_pci() function places into arg the value
+ * pci_dev::slot_name (8 characters) for the PCI device (if any).
+ * Returns: 0 on success
+ * -ENXIO if there isn't a PCI device pointer
+ * (could be because the SCSI driver hasn't been
+ * updated yet, or because it isn't a SCSI
+ * device)
+ * any copy_to_user() error on failure there
+ */
+static int
+scsi_ioctl_get_pci(Scsi_Device * dev, void *arg)
+{
+
+ if (!dev->host->pci_dev)
+ return -ENXIO;
+ if(copy_to_user(arg, dev->host->pci_dev->slot_name,
+ sizeof(dev->host->pci_dev->slot_name)))
+ return -EFAULT;
+ return 0;
+}
+
+
+/*
+ * the scsi_ioctl() function differs from most ioctls in that it does
+ * not take a major/minor number as the dev field. Rather, it takes
+ * a pointer to a scsi_devices[] element, a structure.
+ */
+int scsi_ioctl(Scsi_Device * dev, int cmd, void *arg)
+{
+ char scsi_cmd[MAX_COMMAND_SIZE];
+ char cmd_byte1;
+
+ /* No idea how this happens.... */
+ if (!dev)
+ return -ENXIO;
+
+ /*
+ * If we are in the middle of error recovery, don't let anyone
+ * else try and use this device. Also, if error recovery fails, it
+ * may try and take the device offline, in which case all further
+ * access to the device is prohibited.
+ */
+ if (!scsi_block_when_processing_errors(dev)) {
+ return -ENODEV;
+ }
+ cmd_byte1 = (dev->scsi_level <= SCSI_2) ? (dev->lun << 5) : 0;
+
+ switch (cmd) {
+ case SCSI_IOCTL_GET_IDLUN:
+ if (verify_area(VERIFY_WRITE, arg, sizeof(Scsi_Idlun)))
+ return -EFAULT;
+
+ __put_user((dev->id & 0xff)
+ + ((dev->lun & 0xff) << 8)
+ + ((dev->channel & 0xff) << 16)
+ + ((dev->host->host_no & 0xff) << 24),
+ &((Scsi_Idlun *) arg)->dev_id);
+ __put_user(dev->host->unique_id, &((Scsi_Idlun *) arg)->host_unique_id);
+ return 0;
+ case SCSI_IOCTL_GET_BUS_NUMBER:
+ return put_user(dev->host->host_no, (int *) arg);
+ case SCSI_IOCTL_TAGGED_ENABLE:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (!dev->tagged_supported)
+ return -EINVAL;
+ dev->tagged_queue = 1;
+ dev->current_tag = 1;
+ return 0;
+ case SCSI_IOCTL_TAGGED_DISABLE:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (!dev->tagged_supported)
+ return -EINVAL;
+ dev->tagged_queue = 0;
+ dev->current_tag = 0;
+ return 0;
+ case SCSI_IOCTL_PROBE_HOST:
+ return ioctl_probe(dev->host, arg);
+ case SCSI_IOCTL_SEND_COMMAND:
+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+ return -EACCES;
+ return scsi_ioctl_send_command((Scsi_Device *) dev,
+ (Scsi_Ioctl_Command *) arg);
+ case SCSI_IOCTL_DOORLOCK:
+ if (!dev->removable || !dev->lockable)
+ return 0;
+ scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+ scsi_cmd[1] = cmd_byte1;
+ scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+ scsi_cmd[4] = SCSI_REMOVAL_PREVENT;
+ return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+ IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+ break;
+ case SCSI_IOCTL_DOORUNLOCK:
+ if (!dev->removable || !dev->lockable)
+ return 0;
+ scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+ scsi_cmd[1] = cmd_byte1;
+ scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+ scsi_cmd[4] = SCSI_REMOVAL_ALLOW;
+ return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+ IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+ case SCSI_IOCTL_TEST_UNIT_READY:
+ scsi_cmd[0] = TEST_UNIT_READY;
+ scsi_cmd[1] = cmd_byte1;
+ scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+ scsi_cmd[4] = 0;
+ return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+ IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+ break;
+ case SCSI_IOCTL_START_UNIT:
+ scsi_cmd[0] = START_STOP;
+ scsi_cmd[1] = cmd_byte1;
+ scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+ scsi_cmd[4] = 1;
+ return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+ START_STOP_TIMEOUT, NORMAL_RETRIES);
+ break;
+ case SCSI_IOCTL_STOP_UNIT:
+ scsi_cmd[0] = START_STOP;
+ scsi_cmd[1] = cmd_byte1;
+ scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+ scsi_cmd[4] = 0;
+ return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+ START_STOP_TIMEOUT, NORMAL_RETRIES);
+ break;
+ case SCSI_IOCTL_GET_PCI:
+ return scsi_ioctl_get_pci(dev, arg);
+ break;
+ default:
+ if (dev->host->hostt->ioctl)
+ return dev->host->hostt->ioctl(dev, cmd, arg);
+ return -EINVAL;
+ }
+ return -EINVAL;
+}
+
+/*
+ * Just like scsi_ioctl, only callable from kernel space with no
+ * fs segment fiddling.
+ */
+
+int kernel_scsi_ioctl(Scsi_Device * dev, int cmd, void *arg)
+{
+ mm_segment_t oldfs;
+ int tmp;
+ oldfs = get_fs();
+ set_fs(get_ds());
+ tmp = scsi_ioctl(dev, cmd, arg);
+ set_fs(oldfs);
+ return tmp;
+}
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_lib.c b/xen/drivers/scsi/scsi_lib.c
new file mode 100644
index 0000000000..8c32bf547f
--- /dev/null
+++ b/xen/drivers/scsi/scsi_lib.c
@@ -0,0 +1,1201 @@
+/*
+ * scsi_lib.c Copyright (C) 1999 Eric Youngdale
+ *
+ * SCSI queueing library.
+ * Initial versions: Eric Youngdale (eric@andante.org).
+ * Based upon conversations with large numbers
+ * of people at Linux Expo.
+ */
+
+/*
+ * The fundamental purpose of this file is to contain a library of utility
+ * routines that can be used by low-level drivers. Ultimately the idea
+ * is that there should be a sufficiently rich number of functions that it
+ * would be possible for a driver author to fashion a queueing function for
+ * a low-level driver if they wished. Note however that this file also
+ * contains the "default" versions of these functions, as we don't want to
+ * go through and retrofit queueing functions into all 30 some-odd drivers.
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/* #include <xeno/string.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/ioport.h> */
+/* #include <xeno/kernel.h> */
+/* #include <xeno/stat.h> */
+#include <xeno/blk.h>
+/* #include <xeno/interrupt.h> */
+/* #include <xeno/delay.h> */
+/* #include <xeno/smp_lock.h> */
+/* #include <xeno/completion.h> */
+
+
+#define __KERNEL_SYSCALLS__
+
+/* #include <xeno/unistd.h> */
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+#include <scsi/scsi_ioctl.h>
+
+#define SPECIAL XEN_BLOCK_SPECIAL
+
+/*
+ * This entire source file deals with the new queueing code.
+ */
+
+/*
+ * Function: __scsi_insert_special()
+ *
+ * Purpose: worker for scsi_insert_special_*()
+ *
+ * Arguments: q - request queue where request should be inserted
+ * rq - request to be inserted
+ * data - private data
+ * at_head - insert request at head or tail of queue
+ *
+ * Lock status: Assumed that io_request_lock is not held upon entry.
+ *
+ * Returns: Nothing
+ */
+static void __scsi_insert_special(request_queue_t *q, struct request *rq,
+ void *data, int at_head)
+{
+ unsigned long flags;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ rq->cmd = SPECIAL;
+ rq->special = data;
+ rq->q = NULL;
+ rq->nr_segments = 0;
+ rq->elevator_sequence = 0;
+
+ /*
+ * We have the option of inserting the head or the tail of the queue.
+ * Typically we use the tail for new ioctls and so forth. We use the
+ * head of the queue for things like a QUEUE_FULL message from a
+ * device, or a host that is unable to accept a particular command.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+
+ if (at_head)
+ list_add(&rq->queue, &q->queue_head);
+ else
+ list_add_tail(&rq->queue, &q->queue_head);
+
+ q->request_fn(q);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+
+/*
+ * Function: scsi_insert_special_cmd()
+ *
+ * Purpose: Insert pre-formed command into request queue.
+ *
+ * Arguments: SCpnt - command that is ready to be queued.
+ * at_head - boolean. True if we should insert at head
+ * of queue, false if we should insert at tail.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This function is called from character device and from
+ * ioctl types of functions where the caller knows exactly
+ * what SCSI command needs to be issued. The idea is that
+ * we merely inject the command into the queue (at the head
+ * for now), and then call the queue request function to actually
+ * process it.
+ */
+int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
+{
+ request_queue_t *q = &SCpnt->device->request_queue;
+
+ __scsi_insert_special(q, &SCpnt->request, SCpnt, at_head);
+ return 0;
+}
+
+/*
+ * Function: scsi_insert_special_req()
+ *
+ * Purpose: Insert pre-formed request into request queue.
+ *
+ * Arguments: SRpnt - request that is ready to be queued.
+ * at_head - boolean. True if we should insert at head
+ * of queue, false if we should insert at tail.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This function is called from character device and from
+ * ioctl types of functions where the caller knows exactly
+ * what SCSI command needs to be issued. The idea is that
+ * we merely inject the command into the queue (at the head
+ * for now), and then call the queue request function to actually
+ * process it.
+ */
+int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head)
+{
+ request_queue_t *q = &SRpnt->sr_device->request_queue;
+
+ __scsi_insert_special(q, &SRpnt->sr_request, SRpnt, at_head);
+ return 0;
+}
+
+/*
+ * Function: scsi_init_cmd_errh()
+ *
+ * Purpose: Initialize SCpnt fields related to error handling.
+ *
+ * Arguments: SCpnt - command that is ready to be queued.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This function has the job of initializing a number of
+ * fields related to error handling. Typically this will
+ * be called once for each command, as required.
+ */
+int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt)
+{
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+ SCpnt->reset_chain = NULL;
+ SCpnt->serial_number = 0;
+ SCpnt->serial_number_at_timeout = 0;
+ SCpnt->flags = 0;
+ SCpnt->retries = 0;
+
+ SCpnt->abort_reason = 0;
+
+ memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+
+ if (SCpnt->cmd_len == 0)
+ SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+
+ /*
+ * We need saved copies of a number of fields - this is because
+ * error handling may need to overwrite these with different values
+ * to run different commands, and once error handling is complete,
+ * we will need to restore these values prior to running the actual
+ * command.
+ */
+ SCpnt->old_use_sg = SCpnt->use_sg;
+ SCpnt->old_cmd_len = SCpnt->cmd_len;
+ SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+ SCpnt->old_underflow = SCpnt->underflow;
+ memcpy((void *) SCpnt->data_cmnd,
+ (const void *) SCpnt->cmnd, sizeof(SCpnt->cmnd));
+ SCpnt->buffer = SCpnt->request_buffer;
+ SCpnt->bufflen = SCpnt->request_bufflen;
+
+ SCpnt->reset_chain = NULL;
+
+ SCpnt->internal_timeout = NORMAL_TIMEOUT;
+ SCpnt->abort_reason = 0;
+
+ return 1;
+}
+
+/*
+ * Function: scsi_queue_next_request()
+ *
+ * Purpose: Handle post-processing of completed commands.
+ *
+ * Arguments: SCpnt - command that may need to be requeued.
+ *
+ * Returns: Nothing
+ *
+ * Notes: After command completion, there may be blocks left
+ * over which weren't finished by the previous command
+ * this can be for a number of reasons - the main one is
+ * that a medium error occurred, and the sectors after
+ * the bad block need to be re-read.
+ *
+ * If SCpnt is NULL, it means that the previous command
+ * was completely finished, and we should simply start
+ * a new command, if possible.
+ *
+ * This is where a lot of special case code has begun to
+ * accumulate. It doesn't really affect readability or
+ * anything, but it might be considered architecturally
+ * inelegant. If more of these special cases start to
+ * accumulate, I am thinking along the lines of implementing
+ * an atexit() like technology that gets run when commands
+ * complete. I am not convinced that it is worth the
+ * added overhead, however. Right now as things stand,
+ * there are simple conditional checks, and most hosts
+ * would skip past.
+ *
+ * Another possible solution would be to tailor different
+ * handler functions, sort of like what we did in scsi_merge.c.
+ * This is probably a better solution, but the number of different
+ * permutations grows as 2**N, and if too many more special cases
+ * get added, we start to get screwed.
+ */
+void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
+{
+ int all_clear;
+ unsigned long flags;
+ Scsi_Device *SDpnt;
+ struct Scsi_Host *SHpnt;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ if (SCpnt != NULL) {
+
+ /*
+ * For some reason, we are not done with this request.
+ * This happens for I/O errors in the middle of the request,
+ * in which case we need to request the blocks that come after
+ * the bad sector.
+ */
+ SCpnt->request.special = (void *) SCpnt;
+ list_add(&SCpnt->request.queue, &q->queue_head);
+ }
+
+ /*
+ * Just hit the requeue function for the queue.
+ */
+ q->request_fn(q);
+
+ SDpnt = (Scsi_Device *) q->queuedata;
+ SHpnt = SDpnt->host;
+
+ /*
+ * If this is a single-lun device, and we are currently finished
+ * with this device, then see if we need to get another device
+ * started. FIXME(eric) - if this function gets too cluttered
+ * with special case code, then spin off separate versions and
+ * use function pointers to pick the right one.
+ */
+ if (SDpnt->single_lun
+ && list_empty(&q->queue_head)
+ && SDpnt->device_busy == 0) {
+ request_queue_t *q;
+
+ for (SDpnt = SHpnt->host_queue;
+ SDpnt;
+ SDpnt = SDpnt->next) {
+ if (((SHpnt->can_queue > 0)
+ && (SHpnt->host_busy >= SHpnt->can_queue))
+ || (SHpnt->host_blocked)
+ || (SHpnt->host_self_blocked)
+ || (SDpnt->device_blocked)) {
+ break;
+ }
+ q = &SDpnt->request_queue;
+ q->request_fn(q);
+ }
+ }
+
+ /*
+ * Now see whether there are other devices on the bus which
+ * might be starved. If so, hit the request function. If we
+ * don't find any, then it is safe to reset the flag. If we
+ * find any device that it is starved, it isn't safe to reset the
+ * flag as the queue function releases the lock and thus some
+ * other device might have become starved along the way.
+ */
+ all_clear = 1;
+ if (SHpnt->some_device_starved) {
+ for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+ request_queue_t *q;
+ if ((SHpnt->can_queue > 0 &&(SHpnt->host_busy >= SHpnt->can_queue))
+ || (SHpnt->host_blocked)
+ || (SHpnt->host_self_blocked)) {
+ break;
+ }
+ if (SDpnt->device_blocked || !SDpnt->starved) {
+ continue;
+ }
+ q = &SDpnt->request_queue;
+ q->request_fn(q);
+ all_clear = 0;
+ }
+ if (SDpnt == NULL && all_clear) {
+ SHpnt->some_device_starved = 0;
+ }
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Function: scsi_end_request()
+ *
+ * Purpose: Post-processing of completed commands called from interrupt
+ * handler or a bottom-half handler.
+ *
+ * Arguments: SCpnt - command that is complete.
+ * uptodate - 1 if I/O indicates success, 0 for I/O error.
+ * sectors - number of sectors we want to mark.
+ * requeue - indicates whether we should requeue leftovers.
+ * frequeue - indicates that if we release the command block
+ * that the queue request function should be called.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This is called for block device requests in order to
+ * mark some number of sectors as complete.
+ *
+ * We are guaranteeing that the request queue will be goosed
+ * at some point during this call.
+ */
+static Scsi_Cmnd *__scsi_end_request(Scsi_Cmnd * SCpnt,
+ int uptodate,
+ int sectors,
+ int requeue,
+ int frequeue)
+{
+ struct request *req;
+ struct buffer_head *bh;
+ Scsi_Device * SDpnt;
+ int nsect;
+
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ req = &SCpnt->request;
+ req->errors = 0;
+ if (!uptodate) {
+ printk(" I/O error: dev %s, sector %lu\n",
+ kdevname(req->rq_dev), req->sector);
+ }
+ do {
+ if ((bh = req->bh) != NULL) {
+ nsect = bh->b_size >> 9;
+ blk_finished_io(nsect);
+ req->bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ sectors -= nsect;
+ bh->b_end_io(bh, uptodate);
+ if ((bh = req->bh) != NULL) {
+ req->hard_sector += nsect;
+ req->hard_nr_sectors -= nsect;
+ req->sector += nsect;
+ req->nr_sectors -= nsect;
+
+ req->current_nr_sectors = bh->b_size >> 9;
+ if (req->nr_sectors < req->current_nr_sectors) {
+ req->nr_sectors = req->current_nr_sectors;
+ printk("scsi_end_request: buffer-list destroyed\n");
+ }
+ }
+ }
+ } while (sectors && bh);
+
+ /*
+ * If there are blocks left over at the end, set up the command
+ * to queue the remainder of them.
+ */
+ if (req->bh) {
+ request_queue_t *q;
+
+ if( !requeue )
+ {
+ return SCpnt;
+ }
+
+ q = &SCpnt->device->request_queue;
+
+ req->buffer = bh->b_data;
+ /*
+ * Bleah. Leftovers again. Stick the leftovers in
+ * the front of the queue, and goose the queue again.
+ */
+ scsi_queue_next_request(q, SCpnt);
+ return SCpnt;
+ }
+#if 0
+ /*
+ * This request is done. If there is someone blocked waiting for this
+ * request, wake them up. Typically used to wake up processes trying
+ * to swap a page into memory.
+ */
+ if (req->waiting != NULL) {
+ complete(req->waiting);
+ }
+#endif
+ req_finished_io(req);
+ add_blkdev_randomness(MAJOR(req->rq_dev));
+
+ SDpnt = SCpnt->device;
+
+ /*
+ * This will goose the queue request function at the end, so we don't
+ * need to worry about launching another command.
+ */
+ __scsi_release_command(SCpnt);
+
+ if( frequeue ) {
+ request_queue_t *q;
+
+ q = &SDpnt->request_queue;
+ scsi_queue_next_request(q, NULL);
+ }
+ return NULL;
+}
+
+/*
+ * Function: scsi_end_request()
+ *
+ * Purpose: Post-processing of completed commands called from interrupt
+ * handler or a bottom-half handler.
+ *
+ * Arguments: SCpnt - command that is complete.
+ * uptodate - 1 if I/O indicates success, 0 for I/O error.
+ * sectors - number of sectors we want to mark.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This is called for block device requests in order to
+ * mark some number of sectors as complete.
+ *
+ * We are guaranteeing that the request queue will be goosed
+ * at some point during this call.
+ */
+Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate, int sectors)
+{
+ return __scsi_end_request(SCpnt, uptodate, sectors, 1, 1);
+}
+
+/*
+ * Function: scsi_release_buffers()
+ *
+ * Purpose: Completion processing for block device I/O requests.
+ *
+ * Arguments: SCpnt - command that we are bailing.
+ *
+ * Lock status: Assumed that no lock is held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: In the event that an upper level driver rejects a
+ * command, we must release resources allocated during
+ * the __init_io() function. Primarily this would involve
+ * the scatter-gather table, and potentially any bounce
+ * buffers.
+ */
+static void scsi_release_buffers(Scsi_Cmnd * SCpnt)
+{
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ /*
+ * Free up any indirection buffers we allocated for DMA purposes.
+ */
+ if (SCpnt->use_sg) {
+ struct scatterlist *sgpnt;
+ void **bbpnt;
+ int i;
+
+ sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+ bbpnt = SCpnt->bounce_buffers;
+
+ if (bbpnt) {
+ for (i = 0; i < SCpnt->use_sg; i++) {
+ if (bbpnt[i])
+ scsi_free(sgpnt[i].address, sgpnt[i].length);
+ }
+ }
+ scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+ } else {
+ if (SCpnt->request_buffer != SCpnt->request.buffer) {
+ scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen);
+ }
+ }
+
+ /*
+ * Zero these out. They now point to freed memory, and it is
+ * dangerous to hang onto the pointers.
+ */
+ SCpnt->buffer = NULL;
+ SCpnt->bufflen = 0;
+ SCpnt->request_buffer = NULL;
+ SCpnt->request_bufflen = 0;
+}
+
+/*
+ * Function: scsi_io_completion()
+ *
+ * Purpose: Completion processing for block device I/O requests.
+ *
+ * Arguments: SCpnt - command that is finished.
+ *
+ * Lock status: Assumed that no lock is held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This function is matched in terms of capabilities to
+ * the function that created the scatter-gather list.
+ * In other words, if there are no bounce buffers
+ * (the normal case for most drivers), we don't need
+ * the logic to deal with cleaning up afterwards.
+ */
+void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
+ int block_sectors)
+{
+ int result = SCpnt->result;
+ int this_count = SCpnt->bufflen >> 9;
+ request_queue_t *q = &SCpnt->device->request_queue;
+
+ /*
+ * We must do one of several things here:
+ *
+ * Call scsi_end_request. This will finish off the specified
+ * number of sectors. If we are done, the command block will
+ * be released, and the queue function will be goosed. If we
+ * are not done, then scsi_end_request will directly goose
+ * the queue.
+ *
+ * We can just use scsi_queue_next_request() here. This
+ * would be used if we just wanted to retry, for example.
+ *
+ */
+ ASSERT_LOCK(&io_request_lock, 0);
+
+ /*
+ * Free up any indirection buffers we allocated for DMA purposes.
+ * For the case of a READ, we need to copy the data out of the
+ * bounce buffer and into the real buffer.
+ */
+ if (SCpnt->use_sg) {
+ struct scatterlist *sgpnt;
+ void **bbpnt;
+ int i;
+
+ sgpnt = (struct scatterlist *) SCpnt->buffer;
+ bbpnt = SCpnt->bounce_buffers;
+
+ if (bbpnt) {
+ for (i = 0; i < SCpnt->use_sg; i++) {
+ if (bbpnt[i]) {
+ if (SCpnt->request.cmd == READ) {
+ memcpy(bbpnt[i],
+ sgpnt[i].address,
+ sgpnt[i].length);
+ }
+ scsi_free(sgpnt[i].address, sgpnt[i].length);
+ }
+ }
+ }
+ scsi_free(SCpnt->buffer, SCpnt->sglist_len);
+ } else {
+ if (SCpnt->buffer != SCpnt->request.buffer) {
+ if (SCpnt->request.cmd == READ) {
+ memcpy(SCpnt->request.buffer, SCpnt->buffer,
+ SCpnt->bufflen);
+ }
+ scsi_free(SCpnt->buffer, SCpnt->bufflen);
+ }
+ }
+
+ /*
+ * Zero these out. They now point to freed memory, and it is
+ * dangerous to hang onto the pointers.
+ */
+ SCpnt->buffer = NULL;
+ SCpnt->bufflen = 0;
+ SCpnt->request_buffer = NULL;
+ SCpnt->request_bufflen = 0;
+
+ /*
+ * Next deal with any sectors which we were able to correctly
+ * handle.
+ */
+ if (good_sectors > 0) {
+ SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n",
+ SCpnt->request.nr_sectors,
+ good_sectors));
+ SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg));
+
+ SCpnt->request.errors = 0;
+ /*
+ * If multiple sectors are requested in one buffer, then
+ * they will have been finished off by the first command.
+ * If not, then we have a multi-buffer command.
+ *
+ * If block_sectors != 0, it means we had a medium error
+ * of some sort, and that we want to mark some number of
+ * sectors as not uptodate. Thus we want to inhibit
+ * requeueing right here - we will requeue down below
+ * when we handle the bad sectors.
+ */
+ SCpnt = __scsi_end_request(SCpnt,
+ 1,
+ good_sectors,
+ result == 0,
+ 1);
+
+ /*
+ * If the command completed without error, then either finish off the
+ * rest of the command, or start a new one.
+ */
+ if (result == 0 || SCpnt == NULL ) {
+ return;
+ }
+ }
+ /*
+ * Now, if we were good little boys and girls, Santa left us a request
+ * sense buffer. We can extract information from this, so we
+ * can choose a block to remap, etc.
+ */
+ if (driver_byte(result) != 0) {
+ if (suggestion(result) == SUGGEST_REMAP) {
+#ifdef REMAP
+ /*
+ * Not yet implemented. A read will fail after being remapped,
+ * a write will call the strategy routine again.
+ */
+ if (SCpnt->device->remap) {
+ result = 0;
+ }
+#endif
+ }
+ if ((SCpnt->sense_buffer[0] & 0x7f) == 0x70) {
+ /*
+ * If the device is in the process of becoming ready,
+ * retry.
+ */
+ if (SCpnt->sense_buffer[12] == 0x04 &&
+ SCpnt->sense_buffer[13] == 0x01) {
+ scsi_queue_next_request(q, SCpnt);
+ return;
+ }
+ if ((SCpnt->sense_buffer[2] & 0xf) == UNIT_ATTENTION) {
+ if (SCpnt->device->removable) {
+ /* detected disc change. set a bit
+ * and quietly refuse further access.
+ */
+ SCpnt->device->changed = 1;
+ SCpnt = scsi_end_request(SCpnt, 0, this_count);
+ return;
+ } else {
+ /*
+ * Must have been a power glitch, or a
+ * bus reset. Could not have been a
+ * media change, so we just retry the
+ * request and see what happens.
+ */
+ scsi_queue_next_request(q, SCpnt);
+ return;
+ }
+ }
+ }
+ /* If we had an ILLEGAL REQUEST returned, then we may have
+ * performed an unsupported command. The only thing this should be
+ * would be a ten byte read where only a six byte read was supported.
+ * Also, on a system where READ CAPACITY failed, we have have read
+ * past the end of the disk.
+ */
+
+ switch (SCpnt->sense_buffer[2]) {
+ case ILLEGAL_REQUEST:
+ if (SCpnt->device->ten) {
+ SCpnt->device->ten = 0;
+ /*
+ * This will cause a retry with a 6-byte
+ * command.
+ */
+ scsi_queue_next_request(q, SCpnt);
+ result = 0;
+ } else {
+ SCpnt = scsi_end_request(SCpnt, 0, this_count);
+ return;
+ }
+ break;
+ case NOT_READY:
+ printk(KERN_INFO "Device %s not ready.\n",
+ kdevname(SCpnt->request.rq_dev));
+ SCpnt = scsi_end_request(SCpnt, 0, this_count);
+ return;
+ break;
+ case MEDIUM_ERROR:
+ case VOLUME_OVERFLOW:
+ printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ",
+ SCpnt->host->host_no, (int) SCpnt->channel,
+ (int) SCpnt->target, (int) SCpnt->lun);
+ print_command(SCpnt->cmnd);
+ print_sense("sd", SCpnt);
+ SCpnt = scsi_end_request(SCpnt, 0, block_sectors);
+ return;
+ default:
+ break;
+ }
+ } /* driver byte != 0 */
+ if (host_byte(result) == DID_RESET) {
+ /*
+ * Third party bus reset or reset for error
+ * recovery reasons. Just retry the request
+ * and see what happens.
+ */
+ scsi_queue_next_request(q, SCpnt);
+ return;
+ }
+ if (result) {
+ struct Scsi_Device_Template *STpnt;
+
+ STpnt = scsi_get_request_dev(&SCpnt->request);
+ printk("SCSI %s error : host %d channel %d id %d lun %d return code = %x\n",
+ (STpnt ? STpnt->name : "device"),
+ SCpnt->device->host->host_no,
+ SCpnt->device->channel,
+ SCpnt->device->id,
+ SCpnt->device->lun, result);
+
+ if (driver_byte(result) & DRIVER_SENSE)
+ print_sense("sd", SCpnt);
+ /*
+ * Mark a single buffer as not uptodate. Queue the remainder.
+ * We sometimes get this cruft in the event that a medium error
+ * isn't properly reported.
+ */
+ SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors);
+ return;
+ }
+}
+
+/*
+ * Function: scsi_get_request_dev()
+ *
+ * Purpose: Find the upper-level driver that is responsible for this
+ * request
+ *
+ * Arguments: request - I/O request we are preparing to queue.
+ *
+ * Lock status: No locks assumed to be held, but as it happens the
+ * io_request_lock is held when this is called.
+ *
+ * Returns: Nothing
+ *
+ * Notes: The requests in the request queue may have originated
+ * from any block device driver. We need to find out which
+ * one so that we can later form the appropriate command.
+ */
+struct Scsi_Device_Template *scsi_get_request_dev(struct request *req)
+{
+ struct Scsi_Device_Template *spnt;
+ kdev_t dev = req->rq_dev;
+ int major = MAJOR(dev);
+
+ ASSERT_LOCK(&io_request_lock, 1);
+
+ for (spnt = scsi_devicelist; spnt; spnt = spnt->next) {
+ /*
+ * Search for a block device driver that supports this
+ * major.
+ */
+ if (spnt->blk && spnt->major == major) {
+ return spnt;
+ }
+ /*
+ * I am still not entirely satisfied with this solution,
+ * but it is good enough for now. Disks have a number of
+ * major numbers associated with them, the primary
+ * 8, which we test above, and a secondary range of 7
+ * different consecutive major numbers. If this ever
+ * becomes insufficient, then we could add another function
+ * to the structure, and generalize this completely.
+ */
+ if( spnt->min_major != 0
+ && spnt->max_major != 0
+ && major >= spnt->min_major
+ && major <= spnt->max_major )
+ {
+ return spnt;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Function: scsi_request_fn()
+ *
+ * Purpose: Generic version of request function for SCSI hosts.
+ *
+ * Arguments: q - Pointer to actual queue.
+ *
+ * Returns: Nothing
+ *
+ * Lock status: IO request lock assumed to be held when called.
+ *
+ * Notes: The theory is that this function is something which individual
+ * drivers could also supply if they wished to. The problem
+ * is that we have 30 some odd low-level drivers in the kernel
+ * tree already, and it would be most difficult to retrofit
+ * this crap into all of them. Thus this function has the job
+ * of acting as a generic queue manager for all of those existing
+ * drivers.
+ */
+void scsi_request_fn(request_queue_t * q)
+{
+ struct request *req;
+ Scsi_Cmnd *SCpnt;
+ Scsi_Request *SRpnt;
+ Scsi_Device *SDpnt;
+ struct Scsi_Host *SHpnt;
+ struct Scsi_Device_Template *STpnt;
+
+ ASSERT_LOCK(&io_request_lock, 1);
+
+ SDpnt = (Scsi_Device *) q->queuedata;
+ if (!SDpnt) {
+ panic("Missing device");
+ }
+ SHpnt = SDpnt->host;
+
+ /*
+ * To start with, we keep looping until the queue is empty, or until
+ * the host is no longer able to accept any more requests.
+ */
+ while (1 == 1) {
+ /*
+ * Check this again - each time we loop through we will have
+ * released the lock and grabbed it again, so each time
+ * we need to check to see if the queue is plugged or not.
+ */
+ if (SHpnt->in_recovery || q->plugged)
+ return;
+
+ /*
+ * If the device cannot accept another request, then quit.
+ */
+ if (SDpnt->device_blocked) {
+ break;
+ }
+ if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue))
+ || (SHpnt->host_blocked)
+ || (SHpnt->host_self_blocked)) {
+ /*
+ * If we are unable to process any commands at all for
+ * this device, then we consider it to be starved.
+ * What this means is that there are no outstanding
+ * commands for this device and hence we need a
+ * little help getting it started again
+ * once the host isn't quite so busy.
+ */
+ if (SDpnt->device_busy == 0) {
+ SDpnt->starved = 1;
+ SHpnt->some_device_starved = 1;
+ }
+ break;
+ } else {
+ SDpnt->starved = 0;
+ }
+
+ /*
+ * FIXME(eric)
+ * I am not sure where the best place to do this is. We need
+ * to hook in a place where we are likely to come if in user
+ * space. Technically the error handling thread should be
+ * doing this crap, but the error handler isn't used by
+ * most hosts.
+ */
+ if (SDpnt->was_reset) {
+ /*
+ * We need to relock the door, but we might
+ * be in an interrupt handler. Only do this
+ * from user space, since we do not want to
+ * sleep from an interrupt.
+ *
+ * FIXME(eric) - have the error handler thread do
+ * this work.
+ */
+ SDpnt->was_reset = 0;
+ if (SDpnt->removable && !in_interrupt()) {
+ spin_unlock_irq(&io_request_lock);
+ scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0);
+ spin_lock_irq(&io_request_lock);
+ continue;
+ }
+ }
+
+ /*
+ * If we couldn't find a request that could be queued, then we
+ * can also quit.
+ */
+ if (list_empty(&q->queue_head))
+ break;
+
+ /*
+ * Loop through all of the requests in this queue, and find
+ * one that is queueable.
+ */
+ req = blkdev_entry_next_request(&q->queue_head);
+
+ /*
+ * Find the actual device driver associated with this command.
+ * The SPECIAL requests are things like character device or
+ * ioctls, which did not originate from ll_rw_blk. Note that
+ * the special field is also used to indicate the SCpnt for
+ * the remainder of a partially fulfilled request that can
+ * come up when there is a medium error. We have to treat
+ * these two cases differently. We differentiate by looking
+ * at request.cmd, as this tells us the real story.
+ */
+ if (req->cmd == SPECIAL) {
+ STpnt = NULL;
+ SCpnt = (Scsi_Cmnd *) req->special;
+ SRpnt = (Scsi_Request *) req->special;
+
+ if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) {
+ SCpnt = scsi_allocate_device(SRpnt->sr_device,
+ FALSE, FALSE);
+ if( !SCpnt ) {
+ break;
+ }
+ scsi_init_cmd_from_req(SCpnt, SRpnt);
+ }
+
+ } else {
+ SRpnt = NULL;
+ STpnt = scsi_get_request_dev(req);
+ if (!STpnt) {
+ panic("Unable to find device associated with request");
+ }
+ /*
+ * Now try and find a command block that we can use.
+ */
+ if( req->special != NULL ) {
+ SCpnt = (Scsi_Cmnd *) req->special;
+ /*
+ * We need to recount the number of
+ * scatter-gather segments here - the
+ * normal case code assumes this to be
+ * correct, as it would be a performance
+ * lose to always recount. Handling
+ * errors is always unusual, of course.
+ */
+ recount_segments(SCpnt);
+ } else {
+ SCpnt = scsi_allocate_device(SDpnt, FALSE, FALSE);
+ }
+ /*
+ * If so, we are ready to do something. Bump the count
+ * while the queue is locked and then break out of the
+ * loop. Otherwise loop around and try another request.
+ */
+ if (!SCpnt) {
+ break;
+ }
+ }
+
+ /*
+ * Now bump the usage count for both the host and the
+ * device.
+ */
+ SHpnt->host_busy++;
+ SDpnt->device_busy++;
+
+ /*
+ * Finally, before we release the lock, we copy the
+ * request to the command block, and remove the
+ * request from the request list. Note that we always
+ * operate on the queue head - there is absolutely no
+ * reason to search the list, because all of the commands
+ * in this queue are for the same device.
+ */
+ blkdev_dequeue_request(req);
+
+ if (req != &SCpnt->request && req != &SRpnt->sr_request ) {
+ memcpy(&SCpnt->request, req, sizeof(struct request));
+
+ /*
+ * We have copied the data out of the request block -
+ * it is now in a field in SCpnt. Release the request
+ * block.
+ */
+ blkdev_release_request(req);
+ }
+ /*
+ * Now it is finally safe to release the lock. We are
+ * not going to noodle the request list until this
+ * request has been queued and we loop back to queue
+ * another.
+ */
+ req = NULL;
+ spin_unlock_irq(&io_request_lock);
+
+ if (SCpnt->request.cmd != SPECIAL) {
+ /*
+ * This will do a couple of things:
+ * 1) Fill in the actual SCSI command.
+ * 2) Fill in any other upper-level specific fields
+ * (timeout).
+ *
+ * If this returns 0, it means that the request failed
+ * (reading past end of disk, reading offline device,
+ * etc). This won't actually talk to the device, but
+ * some kinds of consistency checking may cause the
+ * request to be rejected immediately.
+ */
+ if (STpnt == NULL) {
+ STpnt = scsi_get_request_dev(req);
+ }
+ /*
+ * This sets up the scatter-gather table (allocating if
+ * required). Hosts that need bounce buffers will also
+ * get those allocated here.
+ */
+ if (!SDpnt->scsi_init_io_fn(SCpnt)) {
+ SCpnt = __scsi_end_request(SCpnt, 0,
+ SCpnt->request.nr_sectors, 0, 0);
+ if( SCpnt != NULL )
+ {
+ panic("Should not have leftover blocks\n");
+ }
+ spin_lock_irq(&io_request_lock);
+ SHpnt->host_busy--;
+ SDpnt->device_busy--;
+ continue;
+ }
+ /*
+ * Initialize the actual SCSI command for this request.
+ */
+ if (!STpnt->init_command(SCpnt)) {
+ scsi_release_buffers(SCpnt);
+ SCpnt = __scsi_end_request(SCpnt, 0,
+ SCpnt->request.nr_sectors, 0, 0);
+ if( SCpnt != NULL )
+ {
+ panic("Should not have leftover blocks\n");
+ }
+ spin_lock_irq(&io_request_lock);
+ SHpnt->host_busy--;
+ SDpnt->device_busy--;
+ continue;
+ }
+ }
+ /*
+ * Finally, initialize any error handling parameters, and set up
+ * the timers for timeouts.
+ */
+ scsi_init_cmd_errh(SCpnt);
+
+ /*
+ * Dispatch the command to the low-level driver.
+ */
+ scsi_dispatch_cmd(SCpnt);
+
+ /*
+ * Now we need to grab the lock again. We are about to mess
+ * with the request queue and try to find another command.
+ */
+ spin_lock_irq(&io_request_lock);
+ }
+}
+
+/*
+ * Function: scsi_block_requests()
+ *
+ * Purpose: Utility function used by low-level drivers to prevent further
+ * commands from being queued to the device.
+ *
+ * Arguments: SHpnt - Host in question
+ *
+ * Returns: Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes: There is no timer nor any other means by which the requests
+ * get unblocked other than the low-level driver calling
+ * scsi_unblock_requests().
+ */
+void scsi_block_requests(struct Scsi_Host * SHpnt)
+{
+ SHpnt->host_self_blocked = TRUE;
+}
+
+/*
+ * Function: scsi_unblock_requests()
+ *
+ * Purpose: Utility function used by low-level drivers to allow further
+ * commands from being queued to the device.
+ *
+ * Arguments: SHpnt - Host in question
+ *
+ * Returns: Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes: There is no timer nor any other means by which the requests
+ * get unblocked other than the low-level driver calling
+ * scsi_unblock_requests().
+ *
+ * This is done as an API function so that changes to the
+ * internals of the scsi mid-layer won't require wholesale
+ * changes to drivers that use this feature.
+ */
+void scsi_unblock_requests(struct Scsi_Host * SHpnt)
+{
+ Scsi_Device *SDloop;
+
+ SHpnt->host_self_blocked = FALSE;
+ /* Now that we are unblocked, try to start the queues. */
+ for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next)
+ scsi_queue_next_request(&SDloop->request_queue, NULL);
+}
+
+/*
+ * Function: scsi_report_bus_reset()
+ *
+ * Purpose: Utility function used by low-level drivers to report that
+ * they have observed a bus reset on the bus being handled.
+ *
+ * Arguments: SHpnt - Host in question
+ * channel - channel on which reset was observed.
+ *
+ * Returns: Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes: This only needs to be called if the reset is one which
+ * originates from an unknown location. Resets originated
+ * by the mid-level itself don't need to call this, but there
+ * should be no harm.
+ *
+ * The main purpose of this is to make sure that a CHECK_CONDITION
+ * is properly treated.
+ */
+void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel)
+{
+ Scsi_Device *SDloop;
+ for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next) {
+ if (channel == SDloop->channel) {
+ SDloop->was_reset = 1;
+ SDloop->expecting_cc_ua = 1;
+ }
+ }
+}
+
+/*
+ * FIXME(eric) - these are empty stubs for the moment. I need to re-implement
+ * host blocking from scratch. The theory is that hosts that wish to block
+ * will register/deregister using these functions instead of the old way
+ * of setting the wish_block flag.
+ *
+ * The details of the implementation remain to be settled, however the
+ * stubs are here now so that the actual drivers will properly compile.
+ */
+void scsi_register_blocked_host(struct Scsi_Host * SHpnt)
+{
+}
+
+void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt)
+{
+}
diff --git a/xen/drivers/scsi/scsi_merge.c b/xen/drivers/scsi/scsi_merge.c
new file mode 100644
index 0000000000..92306b3ec0
--- /dev/null
+++ b/xen/drivers/scsi/scsi_merge.c
@@ -0,0 +1,1181 @@
+/*
+ * scsi_merge.c Copyright (C) 1999 Eric Youngdale
+ *
+ * SCSI queueing library.
+ * Initial versions: Eric Youngdale (eric@andante.org).
+ * Based upon conversations with large numbers
+ * of people at Linux Expo.
+ * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com).
+ */
+
+/*
+ * This file contains queue management functions that are used by SCSI.
+ * Typically this is used for several purposes. First, we need to ensure
+ * that commands do not grow so large that they cannot be handled all at
+ * once by a host adapter. The various flavors of merge functions included
+ * here serve this purpose.
+ *
+ * Note that it would be quite trivial to allow the low-level driver the
+ * flexibility to define it's own queue handling functions. For the time
+ * being, the hooks are not present. Right now we are just using the
+ * data in the host template as an indicator of how we should be handling
+ * queues, and we select routines that are optimized for that purpose.
+ *
+ * Some hosts do not impose any restrictions on the size of a request.
+ * In such cases none of the merge functions in this file are called,
+ * and we allow ll_rw_blk to merge requests in the default manner.
+ * This isn't guaranteed to be optimal, but it should be pretty darned
+ * good. If someone comes up with ideas of better ways of managing queues
+ * to improve on the default behavior, then certainly fit it into this
+ * scheme in whatever manner makes the most sense. Please note that
+ * since each device has it's own queue, we have considerable flexibility
+ * in queue management.
+ */
+
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/* #include <xeno/string.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/ioport.h> */
+/* #include <xeno/kernel.h> */
+/* #include <xeno/stat.h> */
+#include <xeno/blk.h>
+/* #include <xeno/interrupt.h> */
+/* #include <xeno/delay.h> */
+/* #include <xeno/smp_lock.h> */
+
+
+#define __KERNEL_SYSCALLS__
+
+/* #include <xeno/unistd.h> */
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+#include <scsi/scsi_ioctl.h>
+
+/*
+ * This means that bounce buffers cannot be allocated in chunks > PAGE_SIZE.
+ * Ultimately we should get away from using a dedicated DMA bounce buffer
+ * pool, and we should instead try and use kmalloc() instead. If we can
+ * eliminate this pool, then this restriction would no longer be needed.
+ */
+#define DMA_SEGMENT_SIZE_LIMITED
+
+#ifdef CONFIG_SCSI_DEBUG_QUEUES
+/*
+ * Enable a bunch of additional consistency checking. Turn this off
+ * if you are benchmarking.
+ */
+static int dump_stats(struct request *req,
+ int use_clustering,
+ int dma_host,
+ int segments)
+{
+ struct buffer_head *bh;
+
+ /*
+ * Dump the information that we have. We know we have an
+ * inconsistency.
+ */
+ printk("nr_segments is %x\n", req->nr_segments);
+ printk("counted segments is %x\n", segments);
+ printk("Flags %d %d\n", use_clustering, dma_host);
+ for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext)
+ {
+ printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
+ bh,
+ bh->b_size >> 9,
+ virt_to_phys(bh->b_data - 1));
+ }
+ panic("Ththththaats all folks. Too dangerous to continue.\n");
+}
+
+
+/*
+ * Simple sanity check that we will use for the first go around
+ * in order to ensure that we are doing the counting correctly.
+ * This can be removed for optimization.
+ */
+#define SANITY_CHECK(req, _CLUSTER, _DMA) \
+ if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) ) \
+ { \
+ printk("Incorrect segment count at 0x%p", current_text_addr()); \
+ dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \
+ }
+#else
+#define SANITY_CHECK(req, _CLUSTER, _DMA)
+#endif
+
+static void dma_exhausted(Scsi_Cmnd * SCpnt, int i)
+{
+ int jj;
+ struct scatterlist *sgpnt;
+ void **bbpnt;
+ int consumed = 0;
+
+ sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+ bbpnt = SCpnt->bounce_buffers;
+
+ /*
+ * Now print out a bunch of stats. First, start with the request
+ * size.
+ */
+ printk("dma_free_sectors:%d\n", scsi_dma_free_sectors);
+ printk("use_sg:%d\ti:%d\n", SCpnt->use_sg, i);
+ printk("request_bufflen:%d\n", SCpnt->request_bufflen);
+ /*
+ * Now dump the scatter-gather table, up to the point of failure.
+ */
+ for(jj=0; jj < SCpnt->use_sg; jj++)
+ {
+ printk("[%d]\tlen:%d\taddr:%p\tbounce:%p\n",
+ jj,
+ sgpnt[jj].length,
+ sgpnt[jj].address,
+ (bbpnt ? bbpnt[jj] : NULL));
+ if (bbpnt && bbpnt[jj])
+ consumed += sgpnt[jj].length;
+ }
+ printk("Total %d sectors consumed\n", consumed);
+ panic("DMA pool exhausted");
+}
+
+#define CLUSTERABLE_DEVICE(SH,SD) (SH->use_clustering)
+
+/*
+ * This entire source file deals with the new queueing code.
+ */
+
+/*
+ * Function: __count_segments()
+ *
+ * Purpose: Prototype for queue merge function.
+ *
+ * Arguments: q - Queue for which we are merging request.
+ * req - request into which we wish to merge.
+ * use_clustering - 1 if this host wishes to use clustering
+ * dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ * expose all of the address lines, so that DMA cannot
+ * be done from an arbitrary address).
+ * remainder - used to track the residual size of the last
+ * segment. Comes in handy when we want to limit the
+ * size of bounce buffer segments to PAGE_SIZE.
+ *
+ * Returns: Count of the number of SG segments for the request.
+ *
+ * Lock status:
+ *
+ * Notes: This is only used for diagnostic purposes.
+ */
+__inline static int __count_segments(struct request *req,
+ int use_clustering,
+ int dma_host,
+ int * remainder)
+{
+ int ret = 1;
+ int reqsize = 0;
+ struct buffer_head *bh;
+ struct buffer_head *bhnext;
+
+ if( remainder != NULL ) {
+ reqsize = *remainder;
+ }
+
+ /*
+ * Add in the size increment for the first buffer.
+ */
+ bh = req->bh;
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+ if( reqsize + bh->b_size > PAGE_SIZE ) {
+ ret++;
+ reqsize = bh->b_size;
+ } else {
+ reqsize += bh->b_size;
+ }
+#else
+ reqsize += bh->b_size;
+#endif
+
+ for (bh = req->bh, bhnext = bh->b_reqnext;
+ bhnext != NULL;
+ bh = bhnext, bhnext = bh->b_reqnext) {
+ if (use_clustering) {
+ /*
+ * See if we can do this without creating another
+ * scatter-gather segment. In the event that this is a
+ * DMA capable host, make sure that a segment doesn't span
+ * the DMA threshold boundary.
+ */
+ if (dma_host &&
+ virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) {
+ ret++;
+ reqsize = bhnext->b_size;
+ } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) {
+ /*
+ * This one is OK. Let it go.
+ */
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+ /* Note scsi_malloc is only able to hand out
+ * chunks of memory in sizes of PAGE_SIZE or
+ * less. Thus we need to keep track of
+ * the size of the piece that we have
+ * seen so far, and if we have hit
+ * the limit of PAGE_SIZE, then we are
+ * kind of screwed and we need to start
+ * another segment.
+ */
+ if( dma_host
+ && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD
+ && reqsize + bhnext->b_size > PAGE_SIZE )
+ {
+ ret++;
+ reqsize = bhnext->b_size;
+ continue;
+ }
+#endif
+ reqsize += bhnext->b_size;
+ continue;
+ }
+ ret++;
+ reqsize = bhnext->b_size;
+ } else {
+ ret++;
+ reqsize = bhnext->b_size;
+ }
+ }
+ if( remainder != NULL ) {
+ *remainder = reqsize;
+ }
+ return ret;
+}
+
+/*
+ * Function: recount_segments()
+ *
+ * Purpose: Recount the number of scatter-gather segments for this request.
+ *
+ * Arguments: req - request that needs recounting.
+ *
+ * Returns: Count of the number of SG segments for the request.
+ *
+ * Lock status: Irrelevant.
+ *
+ * Notes: This is only used when we have partially completed requests
+ * and the bit that is leftover is of an indeterminate size.
+ * This can come up if you get a MEDIUM_ERROR, for example,
+ * as we will have "completed" all of the sectors up to and
+ * including the bad sector, and the leftover bit is what
+ * we have to do now. This tends to be a rare occurrence, so
+ * we aren't busting our butts to instantiate separate versions
+ * of this function for the 4 different flag values. We
+ * probably should, however.
+ */
+void
+recount_segments(Scsi_Cmnd * SCpnt)
+{
+ struct request *req;
+ struct Scsi_Host *SHpnt;
+ Scsi_Device * SDpnt;
+
+ req = &SCpnt->request;
+ SHpnt = SCpnt->host;
+ SDpnt = SCpnt->device;
+
+ req->nr_segments = __count_segments(req,
+ CLUSTERABLE_DEVICE(SHpnt, SDpnt),
+ SHpnt->unchecked_isa_dma, NULL);
+}
+
+#define MERGEABLE_BUFFERS(X,Y) \
+(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \
+ (DMA_CHUNK_SIZE - 1)) == 0)
+
+#ifdef DMA_CHUNK_SIZE
+static inline int scsi_new_mergeable(request_queue_t * q,
+ struct request * req,
+ struct Scsi_Host *SHpnt,
+ int max_segments)
+{
+ /*
+ * pci_map_sg will be able to merge these two
+ * into a single hardware sg entry, check if
+ * we'll have enough memory for the sg list.
+ * scsi.c allocates for this purpose
+ * min(64,sg_tablesize) entries.
+ */
+ if (req->nr_segments >= max_segments ||
+ req->nr_segments >= SHpnt->sg_tablesize)
+ return 0;
+ req->nr_segments++;
+ return 1;
+}
+
+static inline int scsi_new_segment(request_queue_t * q,
+ struct request * req,
+ struct Scsi_Host *SHpnt,
+ int max_segments)
+{
+ /*
+ * pci_map_sg won't be able to map these two
+ * into a single hardware sg entry, so we have to
+ * check if things fit into sg_tablesize.
+ */
+ if (req->nr_hw_segments >= SHpnt->sg_tablesize ||
+ req->nr_segments >= SHpnt->sg_tablesize)
+ return 0;
+ req->nr_hw_segments++;
+ req->nr_segments++;
+ return 1;
+}
+#else
+static inline int scsi_new_segment(request_queue_t * q,
+ struct request * req,
+ struct Scsi_Host *SHpnt,
+ int max_segments)
+{
+ if (req->nr_segments < SHpnt->sg_tablesize &&
+ req->nr_segments < max_segments) {
+ /*
+ * This will form the start of a new segment. Bump the
+ * counter.
+ */
+ req->nr_segments++;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+#endif
+
+/*
+ * Function: __scsi_merge_fn()
+ *
+ * Purpose: Prototype for queue merge function.
+ *
+ * Arguments: q - Queue for which we are merging request.
+ * req - request into which we wish to merge.
+ * bh - Block which we may wish to merge into request
+ * use_clustering - 1 if this host wishes to use clustering
+ * dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ * expose all of the address lines, so that DMA cannot
+ * be done from an arbitrary address).
+ *
+ * Returns: 1 if it is OK to merge the block into the request. 0
+ * if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes: Some drivers have limited scatter-gather table sizes, and
+ * thus they cannot queue an infinitely large command. This
+ * function is called from ll_rw_blk before it attempts to merge
+ * a new block into a request to make sure that the request will
+ * not become too large.
+ *
+ * This function is not designed to be directly called. Instead
+ * it should be referenced from other functions where the
+ * use_clustering and dma_host parameters should be integer
+ * constants. The compiler should thus be able to properly
+ * optimize the code, eliminating stuff that is irrelevant.
+ * It is more maintainable to do this way with a single function
+ * than to have 4 separate functions all doing roughly the
+ * same thing.
+ */
+__inline static int __scsi_back_merge_fn(request_queue_t * q,
+ struct request *req,
+ struct buffer_head *bh,
+ int max_segments,
+ int use_clustering,
+ int dma_host)
+{
+ unsigned int count;
+ unsigned int segment_size = 0;
+ Scsi_Device *SDpnt;
+ struct Scsi_Host *SHpnt;
+
+ SDpnt = (Scsi_Device *) q->queuedata;
+ SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+ if (max_segments > 64)
+ max_segments = 64;
+#endif
+
+ if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+ return 0;
+
+ if (use_clustering) {
+ /*
+ * See if we can do this without creating another
+ * scatter-gather segment. In the event that this is a
+ * DMA capable host, make sure that a segment doesn't span
+ * the DMA threshold boundary.
+ */
+ if (dma_host &&
+ virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+ goto new_end_segment;
+ }
+ if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+ if( dma_host
+ && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+ segment_size = 0;
+ count = __count_segments(req, use_clustering, dma_host, &segment_size);
+ if( segment_size + bh->b_size > PAGE_SIZE ) {
+ goto new_end_segment;
+ }
+ }
+#endif
+ /*
+ * This one is OK. Let it go.
+ */
+ return 1;
+ }
+ }
+ new_end_segment:
+#ifdef DMA_CHUNK_SIZE
+ if (MERGEABLE_BUFFERS(req->bhtail, bh))
+ return scsi_new_mergeable(q, req, SHpnt, max_segments);
+#endif
+ return scsi_new_segment(q, req, SHpnt, max_segments);
+}
+
+__inline static int __scsi_front_merge_fn(request_queue_t * q,
+ struct request *req,
+ struct buffer_head *bh,
+ int max_segments,
+ int use_clustering,
+ int dma_host)
+{
+ unsigned int count;
+ unsigned int segment_size = 0;
+ Scsi_Device *SDpnt;
+ struct Scsi_Host *SHpnt;
+
+ SDpnt = (Scsi_Device *) q->queuedata;
+ SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+ if (max_segments > 64)
+ max_segments = 64;
+#endif
+
+ if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+ return 0;
+
+ if (use_clustering) {
+ /*
+ * See if we can do this without creating another
+ * scatter-gather segment. In the event that this is a
+ * DMA capable host, make sure that a segment doesn't span
+ * the DMA threshold boundary.
+ */
+ if (dma_host &&
+ virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) {
+ goto new_start_segment;
+ }
+ if (CONTIGUOUS_BUFFERS(bh, req->bh)) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+ if( dma_host
+ && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+ segment_size = bh->b_size;
+ count = __count_segments(req, use_clustering, dma_host, &segment_size);
+ if( count != req->nr_segments ) {
+ goto new_start_segment;
+ }
+ }
+#endif
+ /*
+ * This one is OK. Let it go.
+ */
+ return 1;
+ }
+ }
+ new_start_segment:
+#ifdef DMA_CHUNK_SIZE
+ if (MERGEABLE_BUFFERS(bh, req->bh))
+ return scsi_new_mergeable(q, req, SHpnt, max_segments);
+#endif
+ return scsi_new_segment(q, req, SHpnt, max_segments);
+}
+
+/*
+ * Function: scsi_merge_fn_()
+ *
+ * Purpose: queue merge function.
+ *
+ * Arguments: q - Queue for which we are merging request.
+ * req - request into which we wish to merge.
+ * bh - Block which we may wish to merge into request
+ *
+ * Returns: 1 if it is OK to merge the block into the request. 0
+ * if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes: Optimized for different cases depending upon whether
+ * ISA DMA is in use and whether clustering should be used.
+ */
+#define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \
+static int _FUNCTION(request_queue_t * q, \
+ struct request * req, \
+ struct buffer_head * bh, \
+ int max_segments) \
+{ \
+ int ret; \
+ SANITY_CHECK(req, _CLUSTER, _DMA); \
+ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \
+ req, \
+ bh, \
+ max_segments, \
+ _CLUSTER, \
+ _DMA); \
+ return ret; \
+}
+
+/* Version with use_clustering 0 and dma_host 1 is not necessary,
+ * since the only use of dma_host above is protected by use_clustering.
+ */
+MERGEFCT(scsi_back_merge_fn_, back, 0, 0)
+MERGEFCT(scsi_back_merge_fn_c, back, 1, 0)
+MERGEFCT(scsi_back_merge_fn_dc, back, 1, 1)
+
+MERGEFCT(scsi_front_merge_fn_, front, 0, 0)
+MERGEFCT(scsi_front_merge_fn_c, front, 1, 0)
+MERGEFCT(scsi_front_merge_fn_dc, front, 1, 1)
+
+/*
+ * Function: __scsi_merge_requests_fn()
+ *
+ * Purpose: Prototype for queue merge function.
+ *
+ * Arguments: q - Queue for which we are merging request.
+ * req - request into which we wish to merge.
+ * next - 2nd request that we might want to combine with req
+ * use_clustering - 1 if this host wishes to use clustering
+ * dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ * expose all of the address lines, so that DMA cannot
+ * be done from an arbitrary address).
+ *
+ * Returns: 1 if it is OK to merge the two requests. 0
+ * if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes: Some drivers have limited scatter-gather table sizes, and
+ * thus they cannot queue an infinitely large command. This
+ * function is called from ll_rw_blk before it attempts to merge
+ * a new block into a request to make sure that the request will
+ * not become too large.
+ *
+ * This function is not designed to be directly called. Instead
+ * it should be referenced from other functions where the
+ * use_clustering and dma_host parameters should be integer
+ * constants. The compiler should thus be able to properly
+ * optimize the code, eliminating stuff that is irrelevant.
+ * It is more maintainable to do this way with a single function
+ * than to have 4 separate functions all doing roughly the
+ * same thing.
+ */
+__inline static int __scsi_merge_requests_fn(request_queue_t * q,
+ struct request *req,
+ struct request *next,
+ int max_segments,
+ int use_clustering,
+ int dma_host)
+{
+ Scsi_Device *SDpnt;
+ struct Scsi_Host *SHpnt;
+
+ /*
+ * First check if the either of the requests are re-queued
+ * requests. Can't merge them if they are.
+ */
+ if (req->special || next->special)
+ return 0;
+
+ SDpnt = (Scsi_Device *) q->queuedata;
+ SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+ if (max_segments > 64)
+ max_segments = 64;
+
+ /* If it would not fit into prepared memory space for sg chain,
+ * then don't allow the merge.
+ */
+ if (req->nr_segments + next->nr_segments - 1 > max_segments ||
+ req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
+ return 0;
+ }
+ if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) {
+ return 0;
+ }
+#else
+ /*
+ * If the two requests together are too large (even assuming that we
+ * can merge the boundary requests into one segment, then don't
+ * allow the merge.
+ */
+ if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
+ return 0;
+ }
+#endif
+
+ if ((req->nr_sectors + next->nr_sectors) > SHpnt->max_sectors)
+ return 0;
+
+ /*
+ * The main question is whether the two segments at the boundaries
+ * would be considered one or two.
+ */
+ if (use_clustering) {
+ /*
+ * See if we can do this without creating another
+ * scatter-gather segment. In the event that this is a
+ * DMA capable host, make sure that a segment doesn't span
+ * the DMA threshold boundary.
+ */
+ if (dma_host &&
+ virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+ goto dont_combine;
+ }
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+ /*
+ * We currently can only allocate scatter-gather bounce
+ * buffers in chunks of PAGE_SIZE or less.
+ */
+ if (dma_host
+ && CONTIGUOUS_BUFFERS(req->bhtail, next->bh)
+ && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD )
+ {
+ int segment_size = 0;
+ int count = 0;
+
+ count = __count_segments(req, use_clustering, dma_host, &segment_size);
+ count += __count_segments(next, use_clustering, dma_host, &segment_size);
+ if( count != req->nr_segments + next->nr_segments ) {
+ goto dont_combine;
+ }
+ }
+#endif
+ if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) {
+ /*
+ * This one is OK. Let it go.
+ */
+ req->nr_segments += next->nr_segments - 1;
+#ifdef DMA_CHUNK_SIZE
+ req->nr_hw_segments += next->nr_hw_segments - 1;
+#endif
+ return 1;
+ }
+ }
+ dont_combine:
+#ifdef DMA_CHUNK_SIZE
+ if (req->nr_segments + next->nr_segments > max_segments ||
+ req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
+ return 0;
+ }
+ /* If dynamic DMA mapping can merge last segment in req with
+ * first segment in next, then the check for hw segments was
+ * done above already, so we can always merge.
+ */
+ if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) {
+ req->nr_hw_segments += next->nr_hw_segments - 1;
+ } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) {
+ return 0;
+ } else {
+ req->nr_hw_segments += next->nr_hw_segments;
+ }
+ req->nr_segments += next->nr_segments;
+ return 1;
+#else
+ /*
+ * We know that the two requests at the boundary should not be combined.
+ * Make sure we can fix something that is the sum of the two.
+ * A slightly stricter test than we had above.
+ */
+ if (req->nr_segments + next->nr_segments > max_segments ||
+ req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
+ return 0;
+ } else {
+ /*
+ * This will form the start of a new segment. Bump the
+ * counter.
+ */
+ req->nr_segments += next->nr_segments;
+ return 1;
+ }
+#endif
+}
+
+/*
+ * Function: scsi_merge_requests_fn_()
+ *
+ * Purpose: queue merge function.
+ *
+ * Arguments: q - Queue for which we are merging request.
+ * req - request into which we wish to merge.
+ * bh - Block which we may wish to merge into request
+ *
+ * Returns: 1 if it is OK to merge the block into the request. 0
+ * if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes: Optimized for different cases depending upon whether
+ * ISA DMA is in use and whether clustering should be used.
+ */
+#define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \
+static int _FUNCTION(request_queue_t * q, \
+ struct request * req, \
+ struct request * next, \
+ int max_segments) \
+{ \
+ int ret; \
+ SANITY_CHECK(req, _CLUSTER, _DMA); \
+ ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \
+ return ret; \
+}
+
+/* Version with use_clustering 0 and dma_host 1 is not necessary,
+ * since the only use of dma_host above is protected by use_clustering.
+ */
+MERGEREQFCT(scsi_merge_requests_fn_, 0, 0)
+MERGEREQFCT(scsi_merge_requests_fn_c, 1, 0)
+MERGEREQFCT(scsi_merge_requests_fn_dc, 1, 1)
+/*
+ * Function: __init_io()
+ *
+ * Purpose: Prototype for io initialize function.
+ *
+ * Arguments: SCpnt - Command descriptor we wish to initialize
+ * sg_count_valid - 1 if the sg count in the req is valid.
+ * use_clustering - 1 if this host wishes to use clustering
+ * dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ * expose all of the address lines, so that DMA cannot
+ * be done from an arbitrary address).
+ *
+ * Returns: 1 on success.
+ *
+ * Lock status:
+ *
+ * Notes: Only the SCpnt argument should be a non-constant variable.
+ * This function is designed in such a way that it will be
+ * invoked from a series of small stubs, each of which would
+ * be optimized for specific circumstances.
+ *
+ * The advantage of this is that hosts that don't do DMA
+ * get versions of the function that essentially don't have
+ * any of the DMA code. Same goes for clustering - in the
+ * case of hosts with no need for clustering, there is no point
+ * in a whole bunch of overhead.
+ *
+ * Finally, in the event that a host has set can_queue to SG_ALL
+ * implying that there is no limit to the length of a scatter
+ * gather list, the sg count in the request won't be valid
+ * (mainly because we don't need queue management functions
+ * which keep the tally uptodate.
+ */
+__inline static int __init_io(Scsi_Cmnd * SCpnt,
+ int sg_count_valid,
+ int use_clustering,
+ int dma_host)
+{
+ struct buffer_head * bh;
+ struct buffer_head * bhprev;
+ char * buff;
+ int count;
+ int i;
+ struct request * req;
+ int sectors;
+ struct scatterlist * sgpnt;
+ int this_count;
+ void ** bbpnt;
+
+ /*
+ * FIXME(eric) - don't inline this - it doesn't depend on the
+ * integer flags. Come to think of it, I don't think this is even
+ * needed any more. Need to play with it and see if we hit the
+ * panic. If not, then don't bother.
+ */
+ if (!SCpnt->request.bh) {
+ /*
+ * Case of page request (i.e. raw device), or unlinked buffer
+ * Typically used for swapping, but this isn't how we do
+ * swapping any more.
+ */
+ panic("I believe this is dead code. If we hit this, I was wrong");
+#if 0
+ SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9;
+ SCpnt->request_buffer = SCpnt->request.buffer;
+ SCpnt->use_sg = 0;
+ /*
+ * FIXME(eric) - need to handle DMA here.
+ */
+#endif
+ return 1;
+ }
+ req = &SCpnt->request;
+ /*
+ * First we need to know how many scatter gather segments are needed.
+ */
+ if (!sg_count_valid) {
+ count = __count_segments(req, use_clustering, dma_host, NULL);
+ } else {
+ count = req->nr_segments;
+ }
+
+ /*
+ * If the dma pool is nearly empty, then queue a minimal request
+ * with a single segment. Typically this will satisfy a single
+ * buffer.
+ */
+ if (dma_host && scsi_dma_free_sectors <= 10) {
+ this_count = SCpnt->request.current_nr_sectors;
+ goto single_segment;
+ }
+ /*
+ * Don't bother with scatter-gather if there is only one segment.
+ */
+ if (count == 1) {
+ this_count = SCpnt->request.nr_sectors;
+ goto single_segment;
+ }
+ SCpnt->use_sg = count;
+
+ /*
+ * Allocate the actual scatter-gather table itself.
+ */
+ SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist));
+
+ /* If we could potentially require ISA bounce buffers, allocate
+ * space for this array here.
+ */
+ if (dma_host)
+ SCpnt->sglist_len += (SCpnt->use_sg * sizeof(void *));
+
+ /* scsi_malloc can only allocate in chunks of 512 bytes so
+ * round it up.
+ */
+ SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511;
+
+ sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len);
+
+ /*
+ * Now fill the scatter-gather table.
+ */
+ if (!sgpnt) {
+ /*
+ * If we cannot allocate the scatter-gather table, then
+ * simply write the first buffer all by itself.
+ */
+ printk("Warning - running *really* short on DMA buffers\n");
+ this_count = SCpnt->request.current_nr_sectors;
+ goto single_segment;
+ }
+ /*
+ * Next, walk the list, and fill in the addresses and sizes of
+ * each segment.
+ */
+ memset(sgpnt, 0, SCpnt->sglist_len);
+ SCpnt->request_buffer = (char *) sgpnt;
+ SCpnt->request_bufflen = 0;
+ bhprev = NULL;
+
+ if (dma_host)
+ bbpnt = (void **) ((char *)sgpnt +
+ (SCpnt->use_sg * sizeof(struct scatterlist)));
+ else
+ bbpnt = NULL;
+
+ SCpnt->bounce_buffers = bbpnt;
+
+ for (count = 0, bh = SCpnt->request.bh;
+ bh; bh = bh->b_reqnext) {
+ if (use_clustering && bhprev != NULL) {
+ if (dma_host &&
+ virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) {
+ /* Nothing - fall through */
+ } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) {
+ /*
+ * This one is OK. Let it go. Note that we
+ * do not have the ability to allocate
+ * bounce buffer segments > PAGE_SIZE, so
+ * for now we limit the thing.
+ */
+ if( dma_host ) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+ if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD
+ || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) {
+ sgpnt[count - 1].length += bh->b_size;
+ bhprev = bh;
+ continue;
+ }
+#else
+ sgpnt[count - 1].length += bh->b_size;
+ bhprev = bh;
+ continue;
+#endif
+ } else {
+ sgpnt[count - 1].length += bh->b_size;
+ SCpnt->request_bufflen += bh->b_size;
+ bhprev = bh;
+ continue;
+ }
+ }
+ }
+ count++;
+ sgpnt[count - 1].address = bh->b_data;
+ sgpnt[count - 1].page = NULL;
+ sgpnt[count - 1].length += bh->b_size;
+ if (!dma_host) {
+ SCpnt->request_bufflen += bh->b_size;
+ }
+ bhprev = bh;
+ }
+
+ /*
+ * Verify that the count is correct.
+ */
+ if (count != SCpnt->use_sg) {
+ printk("Incorrect number of segments after building list\n");
+#ifdef CONFIG_SCSI_DEBUG_QUEUES
+ dump_stats(req, use_clustering, dma_host, count);
+#endif
+ }
+ if (!dma_host) {
+ return 1;
+ }
+ /*
+ * Now allocate bounce buffers, if needed.
+ */
+ SCpnt->request_bufflen = 0;
+ for (i = 0; i < count; i++) {
+ sectors = (sgpnt[i].length >> 9);
+ SCpnt->request_bufflen += sgpnt[i].length;
+ if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 >
+ ISA_DMA_THRESHOLD) {
+ if( scsi_dma_free_sectors - sectors <= 10 ) {
+ /*
+ * If this would nearly drain the DMA
+ * pool empty, then let's stop here.
+ * Don't make this request any larger.
+ * This is kind of a safety valve that
+ * we use - we could get screwed later
+ * on if we run out completely.
+ */
+ SCpnt->request_bufflen -= sgpnt[i].length;
+ SCpnt->use_sg = i;
+ if (i == 0) {
+ goto big_trouble;
+ }
+ break;
+ }
+
+ bbpnt[i] = sgpnt[i].address;
+ sgpnt[i].address =
+ (char *) scsi_malloc(sgpnt[i].length);
+ /*
+ * If we cannot allocate memory for this DMA bounce
+ * buffer, then queue just what we have done so far.
+ */
+ if (sgpnt[i].address == NULL) {
+ printk("Warning - running low on DMA memory\n");
+ SCpnt->request_bufflen -= sgpnt[i].length;
+ SCpnt->use_sg = i;
+ if (i == 0) {
+ goto big_trouble;
+ }
+ break;
+ }
+ if (SCpnt->request.cmd == WRITE) {
+ memcpy(sgpnt[i].address, bbpnt[i],
+ sgpnt[i].length);
+ }
+ }
+ }
+ return 1;
+
+ big_trouble:
+ /*
+ * We come here in the event that we get one humongous
+ * request, where we need a bounce buffer, and the buffer is
+ * more than we can allocate in a single call to
+ * scsi_malloc(). In addition, we only come here when it is
+ * the 0th element of the scatter-gather table that gets us
+ * into this trouble. As a fallback, we fall back to
+ * non-scatter-gather, and ask for a single segment. We make
+ * a half-hearted attempt to pick a reasonably large request
+ * size mainly so that we don't thrash the thing with
+ * iddy-biddy requests.
+ */
+
+ /*
+ * The original number of sectors in the 0th element of the
+ * scatter-gather table.
+ */
+ sectors = sgpnt[0].length >> 9;
+
+ /*
+ * Free up the original scatter-gather table. Note that since
+ * it was the 0th element that got us here, we don't have to
+ * go in and free up memory from the other slots.
+ */
+ SCpnt->request_bufflen = 0;
+ SCpnt->use_sg = 0;
+ scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+
+ /*
+ * Make an attempt to pick up as much as we reasonably can.
+ * Just keep adding sectors until the pool starts running kind of
+ * low. The limit of 30 is somewhat arbitrary - the point is that
+ * it would kind of suck if we dropped down and limited ourselves to
+ * single-block requests if we had hundreds of free sectors.
+ */
+ if( scsi_dma_free_sectors > 30 ) {
+ for (this_count = 0, bh = SCpnt->request.bh;
+ bh; bh = bh->b_reqnext) {
+ if( scsi_dma_free_sectors - this_count < 30
+ || this_count == sectors )
+ {
+ break;
+ }
+ this_count += bh->b_size >> 9;
+ }
+
+ } else {
+ /*
+ * Yow! Take the absolute minimum here.
+ */
+ this_count = SCpnt->request.current_nr_sectors;
+ }
+
+ /*
+ * Now drop through into the single-segment case.
+ */
+
+ single_segment:
+ /*
+ * Come here if for any reason we choose to do this as a single
+ * segment. Possibly the entire request, or possibly a small
+ * chunk of the entire request.
+ */
+ bh = SCpnt->request.bh;
+ buff = SCpnt->request.buffer;
+
+ if (dma_host) {
+ /*
+ * Allocate a DMA bounce buffer. If the allocation fails, fall
+ * back and allocate a really small one - enough to satisfy
+ * the first buffer.
+ */
+ if (virt_to_phys(SCpnt->request.bh->b_data)
+ + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) {
+ buff = (char *) scsi_malloc(this_count << 9);
+ if (!buff) {
+ printk("Warning - running low on DMA memory\n");
+ this_count = SCpnt->request.current_nr_sectors;
+ buff = (char *) scsi_malloc(this_count << 9);
+ if (!buff) {
+ dma_exhausted(SCpnt, 0);
+ }
+ }
+ if (SCpnt->request.cmd == WRITE)
+ memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9);
+ }
+ }
+ SCpnt->request_bufflen = this_count << 9;
+ SCpnt->request_buffer = buff;
+ SCpnt->use_sg = 0;
+ return 1;
+}
+
+#define INITIO(_FUNCTION, _VALID, _CLUSTER, _DMA) \
+static int _FUNCTION(Scsi_Cmnd * SCpnt) \
+{ \
+ return __init_io(SCpnt, _VALID, _CLUSTER, _DMA); \
+}
+
+/*
+ * ll_rw_blk.c now keeps track of the number of segments in
+ * a request. Thus we don't have to do it any more here.
+ * We always force "_VALID" to 1. Eventually clean this up
+ * and get rid of the extra argument.
+ */
+INITIO(scsi_init_io_v, 1, 0, 0)
+INITIO(scsi_init_io_vd, 1, 0, 1)
+INITIO(scsi_init_io_vc, 1, 1, 0)
+INITIO(scsi_init_io_vdc, 1, 1, 1)
+
+/*
+ * Function: initialize_merge_fn()
+ *
+ * Purpose: Initialize merge function for a host
+ *
+ * Arguments: SHpnt - Host descriptor.
+ *
+ * Returns: Nothing.
+ *
+ * Lock status:
+ *
+ * Notes:
+ */
+void initialize_merge_fn(Scsi_Device * SDpnt)
+{
+ request_queue_t *q;
+ struct Scsi_Host *SHpnt;
+ SHpnt = SDpnt->host;
+
+ q = &SDpnt->request_queue;
+
+ /*
+ * If the host has already selected a merge manager, then don't
+ * pick a new one.
+ */
+#if 0
+ if (q->back_merge_fn && q->front_merge_fn)
+ return;
+#endif
+ /*
+ * If this host has an unlimited tablesize, then don't bother with a
+ * merge manager. The whole point of the operation is to make sure
+ * that requests don't grow too large, and this host isn't picky.
+ *
+ * Note that ll_rw_blk.c is effectively maintaining a segment
+ * count which is only valid if clustering is used, and it obviously
+ * doesn't handle the DMA case. In the end, it
+ * is simply easier to do it ourselves with our own functions
+ * rather than rely upon the default behavior of ll_rw_blk.
+ */
+ if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
+ q->back_merge_fn = scsi_back_merge_fn_;
+ q->front_merge_fn = scsi_front_merge_fn_;
+ q->merge_requests_fn = scsi_merge_requests_fn_;
+ SDpnt->scsi_init_io_fn = scsi_init_io_v;
+ } else if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
+ q->back_merge_fn = scsi_back_merge_fn_;
+ q->front_merge_fn = scsi_front_merge_fn_;
+ q->merge_requests_fn = scsi_merge_requests_fn_;
+ SDpnt->scsi_init_io_fn = scsi_init_io_vd;
+ } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
+ q->back_merge_fn = scsi_back_merge_fn_c;
+ q->front_merge_fn = scsi_front_merge_fn_c;
+ q->merge_requests_fn = scsi_merge_requests_fn_c;
+ SDpnt->scsi_init_io_fn = scsi_init_io_vc;
+ } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
+ q->back_merge_fn = scsi_back_merge_fn_dc;
+ q->front_merge_fn = scsi_front_merge_fn_dc;
+ q->merge_requests_fn = scsi_merge_requests_fn_dc;
+ SDpnt->scsi_init_io_fn = scsi_init_io_vdc;
+ }
+}
diff --git a/xen/drivers/scsi/scsi_module.c.inc b/xen/drivers/scsi/scsi_module.c.inc
new file mode 100644
index 0000000000..24099e0f56
--- /dev/null
+++ b/xen/drivers/scsi/scsi_module.c.inc
@@ -0,0 +1,71 @@
+/*
+ * scsi_module.c Copyright (1994, 1995) Eric Youngdale.
+ *
+ * Support for loading low-level scsi drivers using the linux kernel loadable
+ * module interface.
+ *
+ * To use, the host adapter should first define and initialize the variable
+ * driver_template (datatype Scsi_Host_Template), and then include this file.
+ * This should also be wrapped in a #ifdef MODULE/#endif.
+ *
+ * The low -level driver must also define a release function which will
+ * free any irq assignments, release any dma channels, release any I/O
+ * address space that might be reserved, and otherwise clean up after itself.
+ * The idea is that the same driver should be able to be reloaded without
+ * any difficulty. This makes debugging new drivers easier, as you should
+ * be able to load the driver, test it, unload, modify and reload.
+ *
+ * One *very* important caveat. If the driver may need to do DMA on the
+ * ISA bus, you must have unchecked_isa_dma set in the device template,
+ * even if this might be changed during the detect routine. This is
+ * because the shpnt structure will be allocated in a special way so that
+ * it will be below the appropriate DMA limit - thus if your driver uses
+ * the hostdata field of shpnt, and the board must be able to access this
+ * via DMA, the shpnt structure must be in a DMA accessible region of
+ * memory. This comment would be relevant for something like the buslogic
+ * driver where there are many boards, only some of which do DMA onto the
+ * ISA bus. There is no convenient way of specifying whether the host
+ * needs to be in a ISA DMA accessible region of memory when you call
+ * scsi_register.
+ */
+
+#include <xeno/module.h>
+#include <linux/init.h>
+
+static int __init init_this_scsi_driver(void)
+{
+ driver_template.module = THIS_MODULE;
+ scsi_register_module(MODULE_SCSI_HA, &driver_template);
+ if (driver_template.present)
+ return 0;
+
+ scsi_unregister_module(MODULE_SCSI_HA, &driver_template);
+ return -ENODEV;
+}
+
+static void __exit exit_this_scsi_driver(void)
+{
+ scsi_unregister_module(MODULE_SCSI_HA, &driver_template);
+}
+
+module_init(init_this_scsi_driver);
+module_exit(exit_this_scsi_driver);
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_obsolete.h b/xen/drivers/scsi/scsi_obsolete.h
new file mode 100644
index 0000000000..abeacb996e
--- /dev/null
+++ b/xen/drivers/scsi/scsi_obsolete.h
@@ -0,0 +1,106 @@
+/*
+ * scsi_obsolete.h Copyright (C) 1997 Eric Youngdale
+ *
+ */
+
+#ifndef _SCSI_OBSOLETE_H
+#define _SCSI_OBSOLETE_H
+
+/*
+ * These are the return codes for the abort and reset functions. The mid-level
+ * code uses these to decide what to do next. Each of the low level abort
+ * and reset functions must correctly indicate what it has done.
+ * The descriptions are written from the point of view of the mid-level code,
+ * so that the return code is telling the mid-level drivers exactly what
+ * the low level driver has already done, and what remains to be done.
+ */
+
+/* We did not do anything.
+ * Wait some more for this command to complete, and if this does not work,
+ * try something more serious. */
+#define SCSI_ABORT_SNOOZE 0
+
+/* This means that we were able to abort the command. We have already
+ * called the mid-level done function, and do not expect an interrupt that
+ * will lead to another call to the mid-level done function for this command */
+#define SCSI_ABORT_SUCCESS 1
+
+/* We called for an abort of this command, and we should get an interrupt
+ * when this succeeds. Thus we should not restore the timer for this
+ * command in the mid-level abort function. */
+#define SCSI_ABORT_PENDING 2
+
+/* Unable to abort - command is currently on the bus. Grin and bear it. */
+#define SCSI_ABORT_BUSY 3
+
+/* The command is not active in the low level code. Command probably
+ * finished. */
+#define SCSI_ABORT_NOT_RUNNING 4
+
+/* Something went wrong. The low level driver will indicate the correct
+ * error condition when it calls scsi_done, so the mid-level abort function
+ * can simply wait until this comes through */
+#define SCSI_ABORT_ERROR 5
+
+/* We do not know how to reset the bus, or we do not want to. Bummer.
+ * Anyway, just wait a little more for the command in question, and hope that
+ * it eventually finishes. If it never finishes, the SCSI device could
+ * hang, so use this with caution. */
+#define SCSI_RESET_SNOOZE 0
+
+/* We do not know how to reset the bus, or we do not want to. Bummer.
+ * We have given up on this ever completing. The mid-level code will
+ * request sense information to decide how to proceed from here. */
+#define SCSI_RESET_PUNT 1
+
+/* This means that we were able to reset the bus. We have restarted all of
+ * the commands that should be restarted, and we should be able to continue
+ * on normally from here. We do not expect any interrupts that will return
+ * DID_RESET to any of the other commands in the host_queue, and the mid-level
+ * code does not need to do anything special to keep the commands alive.
+ * If a hard reset was performed then all outstanding commands on the
+ * bus have been restarted. */
+#define SCSI_RESET_SUCCESS 2
+
+/* We called for a reset of this bus, and we should get an interrupt
+ * when this succeeds. Each command should get its own status
+ * passed up to scsi_done, but this has not happened yet.
+ * If a hard reset was performed, then we expect an interrupt
+ * for *each* of the outstanding commands that will have the
+ * effect of restarting the commands.
+ */
+#define SCSI_RESET_PENDING 3
+
+/* We did a reset, but do not expect an interrupt to signal DID_RESET.
+ * This tells the upper level code to request the sense info, and this
+ * should keep the command alive. */
+#define SCSI_RESET_WAKEUP 4
+
+/* The command is not active in the low level code. Command probably
+ finished. */
+#define SCSI_RESET_NOT_RUNNING 5
+
+/* Something went wrong, and we do not know how to fix it. */
+#define SCSI_RESET_ERROR 6
+
+#define SCSI_RESET_SYNCHRONOUS 0x01
+#define SCSI_RESET_ASYNCHRONOUS 0x02
+#define SCSI_RESET_SUGGEST_BUS_RESET 0x04
+#define SCSI_RESET_SUGGEST_HOST_RESET 0x08
+/*
+ * This is a bitmask that is ored with one of the above codes.
+ * It tells the mid-level code that we did a hard reset.
+ */
+#define SCSI_RESET_BUS_RESET 0x100
+/*
+ * This is a bitmask that is ored with one of the above codes.
+ * It tells the mid-level code that we did a host adapter reset.
+ */
+#define SCSI_RESET_HOST_RESET 0x200
+/*
+ * Used to mask off bits and to obtain the basic action that was
+ * performed.
+ */
+#define SCSI_RESET_ACTION 0xff
+
+#endif /* SCSI_OBSOLETE_H */
diff --git a/xen/drivers/scsi/scsi_proc.c b/xen/drivers/scsi/scsi_proc.c
new file mode 100644
index 0000000000..41a5f7cc49
--- /dev/null
+++ b/xen/drivers/scsi/scsi_proc.c
@@ -0,0 +1,329 @@
+/*
+ * linux/drivers/scsi/scsi_proc.c
+ *
+ * The functions in this file provide an interface between
+ * the PROC file system and the SCSI device drivers
+ * It is mainly used for debugging, statistics and to pass
+ * information directly to the lowlevel driver.
+ *
+ * (c) 1995 Michael Neuffer neuffer@goofy.zdv.uni-mainz.de
+ * Version: 0.99.8 last change: 95/09/13
+ *
+ * generic command parser provided by:
+ * Andreas Heilwagen <crashcar@informatik.uni-koblenz.de>
+ *
+ * generic_proc_info() support of xxxx_info() by:
+ * Michael A. Griffith <grif@acm.org>
+ */
+
+#include <xeno/config.h> /* for CONFIG_PROC_FS */
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+/* #include <xeno/string.h> */
+/* #include <xeno/mm.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/proc_fs.h> */
+/* #include <xeno/errno.h> */
+/* #include <xeno/stat.h> */
+#include <xeno/blk.h>
+
+#include <asm/uaccess.h>
+
+#include "scsi.h"
+#include "hosts.h"
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+/* generic_proc_info
+ * Used if the driver currently has no own support for /proc/scsi
+ */
+int generic_proc_info(char *buffer, char **start, off_t offset, int length,
+ const char *(*info) (struct Scsi_Host *),
+ struct Scsi_Host *sh)
+{
+ int len, pos, begin;
+
+ begin = 0;
+ if (info && sh) {
+ pos = len = sprintf(buffer, "%s\n", info(sh));
+ } else {
+ pos = len = sprintf(buffer,
+ "The driver does not yet support the proc-fs\n");
+ }
+ if (pos < offset) {
+ len = 0;
+ begin = pos;
+ }
+ *start = buffer + (offset - begin); /* Start of wanted data */
+ len -= (offset - begin);
+ if (len > length)
+ len = length;
+
+ return (len);
+}
+
+/* dispatch_scsi_info is the central dispatcher
+ * It is the interface between the proc-fs and the SCSI subsystem code
+ */
+static int proc_scsi_read(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ struct Scsi_Host *hpnt = data;
+ int n;
+
+ if (hpnt->hostt->proc_info == NULL)
+ n = generic_proc_info(buffer, start, offset, length,
+ hpnt->hostt->info, hpnt);
+ else
+ n = (hpnt->hostt->proc_info(buffer, start, offset,
+ length, hpnt->host_no, 0));
+ *eof = (n<length);
+ return n;
+}
+
+#define PROC_BLOCK_SIZE (3*1024) /* 4K page size, but our output routines
+ * use some slack for overruns
+ */
+
+static int proc_scsi_write(struct file * file, const char * buf,
+ unsigned long count, void *data)
+{
+ struct Scsi_Host *hpnt = data;
+ ssize_t ret = 0;
+ char * page;
+ char *start;
+
+ if (hpnt->hostt->proc_info == NULL)
+ ret = -ENOSYS;
+
+ if (count > PROC_BLOCK_SIZE)
+ return -EOVERFLOW;
+
+ if (!(page = (char *) __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ if(copy_from_user(page, buf, count))
+ {
+ free_page((ulong) page);
+ return -EFAULT;
+ }
+
+ ret = hpnt->hostt->proc_info(page, &start, 0, count,
+ hpnt->host_no, 1);
+
+ free_page((ulong) page);
+ return(ret);
+}
+
+void build_proc_dir_entries(Scsi_Host_Template * tpnt)
+{
+ struct Scsi_Host *hpnt;
+ char name[10]; /* see scsi_unregister_host() */
+
+ tpnt->proc_dir = proc_mkdir(tpnt->proc_name, proc_scsi);
+ if (!tpnt->proc_dir) {
+ printk(KERN_ERR "Unable to proc_mkdir in scsi.c/build_proc_dir_entries");
+ return;
+ }
+ tpnt->proc_dir->owner = tpnt->module;
+
+ hpnt = scsi_hostlist;
+ while (hpnt) {
+ if (tpnt == hpnt->hostt) {
+ struct proc_dir_entry *p;
+ sprintf(name,"%d",hpnt->host_no);
+ p = create_proc_read_entry(name,
+ S_IFREG | S_IRUGO | S_IWUSR,
+ tpnt->proc_dir,
+ proc_scsi_read,
+ (void *)hpnt);
+ if (!p)
+ panic("Not enough memory to register SCSI HBA in /proc/scsi !\n");
+ p->write_proc=proc_scsi_write;
+ p->owner = tpnt->module;
+ }
+ hpnt = hpnt->next;
+ }
+}
+
+/*
+ * parseHandle *parseInit(char *buf, char *cmdList, int cmdNum);
+ * gets a pointer to a null terminated data buffer
+ * and a list of commands with blanks as delimiter
+ * in between.
+ * The commands have to be alphanumerically sorted.
+ * cmdNum has to contain the number of commands.
+ * On success, a pointer to a handle structure
+ * is returned, NULL on failure
+ *
+ * int parseOpt(parseHandle *handle, char **param);
+ * processes the next parameter. On success, the
+ * index of the appropriate command in the cmdList
+ * is returned, starting with zero.
+ * param points to the null terminated parameter string.
+ * On failure, -1 is returned.
+ *
+ * The databuffer buf may only contain pairs of commands
+ * options, separated by blanks:
+ * <Command> <Parameter> [<Command> <Parameter>]*
+ */
+
+typedef struct {
+ char *buf, /* command buffer */
+ *cmdList, /* command list */
+ *bufPos, /* actual position */
+ **cmdPos, /* cmdList index */
+ cmdNum; /* cmd number */
+} parseHandle;
+
+inline int parseFree(parseHandle * handle)
+{ /* free memory */
+ kfree(handle->cmdPos);
+ kfree(handle);
+
+ return -1;
+}
+
+parseHandle *parseInit(char *buf, char *cmdList, int cmdNum)
+{
+ char *ptr; /* temp pointer */
+ parseHandle *handle; /* new handle */
+
+ if (!buf || !cmdList) /* bad input ? */
+ return NULL;
+ handle = (parseHandle *) kmalloc(sizeof(parseHandle), GFP_KERNEL);
+ if (!handle)
+ return NULL; /* out of memory */
+ handle->cmdPos = (char **) kmalloc(sizeof(int) * cmdNum, GFP_KERNEL);
+ if (!handle->cmdPos) {
+ kfree(handle);
+ return NULL; /* out of memory */
+ }
+ handle->buf = handle->bufPos = buf; /* init handle */
+ handle->cmdList = cmdList;
+ handle->cmdNum = cmdNum;
+
+ handle->cmdPos[cmdNum = 0] = cmdList;
+ for (ptr = cmdList; *ptr; ptr++) { /* scan command string */
+ if (*ptr == ' ') { /* and insert zeroes */
+ *ptr++ = 0;
+ handle->cmdPos[++cmdNum] = ptr++;
+ }
+ }
+ return handle;
+}
+
+int parseOpt(parseHandle * handle, char **param)
+{
+ int cmdIndex = 0, cmdLen = 0;
+ char *startPos;
+
+ if (!handle) /* invalid handle */
+ return (parseFree(handle));
+ /* skip spaces */
+ for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++);
+ if (!*(handle->bufPos))
+ return (parseFree(handle)); /* end of data */
+
+ startPos = handle->bufPos; /* store cmd start */
+ for (; handle->cmdPos[cmdIndex][cmdLen] && *(handle->bufPos); handle->bufPos++) { /* no string end? */
+ for (;;) {
+ if (*(handle->bufPos) == handle->cmdPos[cmdIndex][cmdLen])
+ break; /* char matches ? */
+ else if (memcmp(startPos, (char *) (handle->cmdPos[++cmdIndex]), cmdLen))
+ return (parseFree(handle)); /* unknown command */
+
+ if (cmdIndex >= handle->cmdNum)
+ return (parseFree(handle)); /* unknown command */
+ }
+
+ cmdLen++; /* next char */
+ }
+
+ /* Get param. First skip all blanks, then insert zero after param */
+
+ for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++);
+ *param = handle->bufPos;
+
+ for (; *(handle->bufPos) && *(handle->bufPos) != ' '; handle->bufPos++);
+ *(handle->bufPos++) = 0;
+
+ return (cmdIndex);
+}
+
+void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len)
+{
+
+ int x, y = *size;
+ extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
+
+ y = sprintf(buffer + len,
+ "Host: scsi%d Channel: %02d Id: %02d Lun: %02d\n Vendor: ",
+ scd->host->host_no, scd->channel, scd->id, scd->lun);
+ for (x = 0; x < 8; x++) {
+ if (scd->vendor[x] >= 0x20)
+ y += sprintf(buffer + len + y, "%c", scd->vendor[x]);
+ else
+ y += sprintf(buffer + len + y, " ");
+ }
+ y += sprintf(buffer + len + y, " Model: ");
+ for (x = 0; x < 16; x++) {
+ if (scd->model[x] >= 0x20)
+ y += sprintf(buffer + len + y, "%c", scd->model[x]);
+ else
+ y += sprintf(buffer + len + y, " ");
+ }
+ y += sprintf(buffer + len + y, " Rev: ");
+ for (x = 0; x < 4; x++) {
+ if (scd->rev[x] >= 0x20)
+ y += sprintf(buffer + len + y, "%c", scd->rev[x]);
+ else
+ y += sprintf(buffer + len + y, " ");
+ }
+ y += sprintf(buffer + len + y, "\n");
+
+ y += sprintf(buffer + len + y, " Type: %s ",
+ scd->type < MAX_SCSI_DEVICE_CODE ?
+ scsi_device_types[(int) scd->type] : "Unknown ");
+ y += sprintf(buffer + len + y, " ANSI"
+ " SCSI revision: %02x", (scd->scsi_level - 1) ? scd->scsi_level - 1 : 1);
+ if (scd->scsi_level == 2)
+ y += sprintf(buffer + len + y, " CCS\n");
+ else
+ y += sprintf(buffer + len + y, "\n");
+
+ *size = y;
+ return;
+}
+
+#else /* if !CONFIG_PROC_FS */
+
+void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len)
+{
+}
+
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * Overrides for Emacs so that we get a uniform tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_queue.c b/xen/drivers/scsi/scsi_queue.c
new file mode 100644
index 0000000000..ce790c9d11
--- /dev/null
+++ b/xen/drivers/scsi/scsi_queue.c
@@ -0,0 +1,151 @@
+/*
+ * scsi_queue.c Copyright (C) 1997 Eric Youngdale
+ *
+ * generic mid-level SCSI queueing.
+ *
+ * The point of this is that we need to track when hosts are unable to
+ * accept a command because they are busy. In addition, we track devices
+ * that cannot accept a command because of a QUEUE_FULL condition. In both
+ * of these cases, we enter the command in the queue. At some later point,
+ * we attempt to remove commands from the queue and retry them.
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/* #include <xeno/string.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/ioport.h> */
+/* #include <xeno/kernel.h> */
+/* #include <xeno/stat.h> */
+#include <xeno/blk.h>
+/* #include <xeno/interrupt.h> */
+/* #include <xeno/delay.h> */
+/* #include <xeno/smp_lock.h> */
+
+#define __KERNEL_SYSCALLS__
+
+/*#include <xeno/unistd.h>*/
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+/*
+ * TODO:
+ * 1) Prevent multiple traversals of list to look for commands to
+ * queue.
+ * 2) Protect against multiple insertions of list at the same time.
+ * DONE:
+ * 1) Set state of scsi command to a new state value for ml queue.
+ * 2) Insert into queue when host rejects command.
+ * 3) Make sure status code is properly passed from low-level queue func
+ * so that internal_cmnd properly returns the right value.
+ * 4) Insert into queue when QUEUE_FULL.
+ * 5) Cull queue in bottom half handler.
+ * 6) Check usage count prior to queue insertion. Requeue if usage
+ * count is 0.
+ * 7) Don't send down any more commands if the host/device is busy.
+ */
+
+static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_queue.c,v 1.1 1997/10/21 11:16:38 eric Exp $";
+
+
+/*
+ * Function: scsi_mlqueue_insert()
+ *
+ * Purpose: Insert a command in the midlevel queue.
+ *
+ * Arguments: cmd - command that we are adding to queue.
+ * reason - why we are inserting command to queue.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns: Nothing.
+ *
+ * Notes: We do this for one of two cases. Either the host is busy
+ * and it cannot accept any more commands for the time being,
+ * or the device returned QUEUE_FULL and can accept no more
+ * commands.
+ * Notes: This could be called either from an interrupt context or a
+ * normal process context.
+ */
+int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason)
+{
+ struct Scsi_Host *host;
+ unsigned long flags;
+
+ SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd));
+
+ /*
+ * We are inserting the command into the ml queue. First, we
+ * cancel the timer, so it doesn't time out.
+ */
+ scsi_delete_timer(cmd);
+
+ host = cmd->host;
+
+ /*
+ * Next, set the appropriate busy bit for the device/host.
+ */
+ if (reason == SCSI_MLQUEUE_HOST_BUSY) {
+ /*
+ * Protect against race conditions. If the host isn't busy,
+ * assume that something actually completed, and that we should
+ * be able to queue a command now. Note that there is an implicit
+ * assumption that every host can always queue at least one command.
+ * If a host is inactive and cannot queue any commands, I don't see
+ * how things could possibly work anyways.
+ */
+ if (host->host_busy == 0) {
+ if (scsi_retry_command(cmd) == 0) {
+ return 0;
+ }
+ }
+ host->host_blocked = TRUE;
+ } else {
+ /*
+ * Protect against race conditions. If the device isn't busy,
+ * assume that something actually completed, and that we should
+ * be able to queue a command now. Note that there is an implicit
+ * assumption that every host can always queue at least one command.
+ * If a host is inactive and cannot queue any commands, I don't see
+ * how things could possibly work anyways.
+ */
+ if (cmd->device->device_busy == 0) {
+ if (scsi_retry_command(cmd) == 0) {
+ return 0;
+ }
+ }
+ cmd->device->device_blocked = TRUE;
+ }
+
+ /*
+ * Register the fact that we own the thing for now.
+ */
+ cmd->state = SCSI_STATE_MLQUEUE;
+ cmd->owner = SCSI_OWNER_MIDLEVEL;
+ cmd->bh_next = NULL;
+
+ /*
+ * Decrement the counters, since these commands are no longer
+ * active on the host/device.
+ */
+ spin_lock_irqsave(&io_request_lock, flags);
+ cmd->host->host_busy--;
+ cmd->device->device_busy--;
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ /*
+ * Insert this command at the head of the queue for it's device.
+ * It will go before all other commands that are already in the queue.
+ */
+ scsi_insert_special_cmd(cmd, 1);
+ return 0;
+}
diff --git a/xen/drivers/scsi/scsi_scan.c b/xen/drivers/scsi/scsi_scan.c
new file mode 100644
index 0000000000..04f4715992
--- /dev/null
+++ b/xen/drivers/scsi/scsi_scan.c
@@ -0,0 +1,906 @@
+/*
+ * scsi_scan.c Copyright (C) 2000 Eric Youngdale
+ *
+ * Bus scan logic.
+ *
+ * This used to live in scsi.c, but that file was just a laundry basket
+ * full of misc stuff. This got separated out in order to make things
+ * clearer.
+ */
+
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/init.h>
+
+#include <xeno/blk.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+/*
+ * Flags for irregular SCSI devices that need special treatment
+ */
+#define BLIST_NOLUN 0x001 /* Don't scan for LUNs */
+#define BLIST_FORCELUN 0x002 /* Known to have LUNs, force sanning */
+#define BLIST_BORKEN 0x004 /* Flag for broken handshaking */
+#define BLIST_KEY 0x008 /* Needs to be unlocked by special command */
+#define BLIST_SINGLELUN 0x010 /* LUNs should better not be used in parallel */
+#define BLIST_NOTQ 0x020 /* Buggy Tagged Command Queuing */
+#define BLIST_SPARSELUN 0x040 /* Non consecutive LUN numbering */
+#define BLIST_MAX5LUN 0x080 /* Avoid LUNS >= 5 */
+#define BLIST_ISDISK 0x100 /* Treat as (removable) disk */
+#define BLIST_ISROM 0x200 /* Treat as (removable) CD-ROM */
+#define BLIST_LARGELUN 0x400 /* LUNs larger than 7 despite reporting as SCSI 2 */
+
+static void print_inquiry(unsigned char *data);
+static int scan_scsis_single(unsigned int channel, unsigned int dev,
+ unsigned int lun, int lun0_scsi_level,
+ unsigned int *max_scsi_dev, unsigned int *sparse_lun,
+ Scsi_Device ** SDpnt, struct Scsi_Host *shpnt,
+ char *scsi_result);
+static int find_lun0_scsi_level(unsigned int channel, unsigned int dev,
+ struct Scsi_Host *shpnt);
+
+struct dev_info {
+ const char *vendor;
+ const char *model;
+ const char *revision; /* Latest revision known to be bad. Not used yet */
+ unsigned flags;
+};
+
+/*
+ * This is what was previously known as the blacklist. The concept
+ * has been expanded so that we can specify other types of things we
+ * need to be aware of.
+ */
+static struct dev_info device_list[] =
+{
+/* The following devices are known not to tolerate a lun != 0 scan for
+ * one reason or another. Some will respond to all luns, others will
+ * lock up.
+ */
+ {"Aashima", "IMAGERY 2400SP", "1.03", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"CHINON", "CD-ROM CDS-431", "H42", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"CHINON", "CD-ROM CDS-535", "Q14", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"DENON", "DRD-25X", "V", BLIST_NOLUN}, /* Locks up if probed for lun != 0 */
+ {"HITACHI", "DK312C", "CM81", BLIST_NOLUN}, /* Responds to all lun - dtg */
+ {"HITACHI", "DK314C", "CR21", BLIST_NOLUN}, /* responds to all lun */
+ {"IMS", "CDD521/10", "2.06", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */
+ {"MAXTOR", "XT-3280", "PR02", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */
+ {"MAXTOR", "XT-4380S", "B3C", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */
+ {"MAXTOR", "MXT-1240S", "I1.2", BLIST_NOLUN}, /* Locks up when LUN>0 polled */
+ {"MAXTOR", "XT-4170S", "B5A", BLIST_NOLUN}, /* Locks-up sometimes when LUN>0 polled. */
+ {"MAXTOR", "XT-8760S", "B7B", BLIST_NOLUN}, /* guess what? */
+ {"MEDIAVIS", "RENO CD-ROMX2A", "2.03", BLIST_NOLUN}, /*Responds to all lun */
+ {"NEC", "CD-ROM DRIVE:841", "1.0", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */
+ {"PHILIPS", "PCA80SC", "V4-2", BLIST_NOLUN}, /* Responds to all lun */
+ {"RODIME", "RO3000S", "2.33", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"SANYO", "CRD-250S", "1.20", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1
+ * for aha152x controller, which causes
+ * SCSI code to reset bus.*/
+ {"SEAGATE", "ST157N", "\004|j", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1
+ * for aha152x controller, which causes
+ * SCSI code to reset bus.*/
+ {"SEAGATE", "ST296", "921", BLIST_NOLUN}, /* Responds to all lun */
+ {"SEAGATE", "ST1581", "6538", BLIST_NOLUN}, /* Responds to all lun */
+ {"SONY", "CD-ROM CDU-541", "4.3d", BLIST_NOLUN},
+ {"SONY", "CD-ROM CDU-55S", "1.0i", BLIST_NOLUN},
+ {"SONY", "CD-ROM CDU-561", "1.7x", BLIST_NOLUN},
+ {"SONY", "CD-ROM CDU-8012", "*", BLIST_NOLUN},
+ {"TANDBERG", "TDC 3600", "U07", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"TEAC", "CD-R55S", "1.0H", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"TEAC", "CD-ROM", "1.06", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1
+ * for seagate controller, which causes
+ * SCSI code to reset bus.*/
+ {"TEAC", "MT-2ST/45S2-27", "RV M", BLIST_NOLUN}, /* Responds to all lun */
+ {"TEXEL", "CD-ROM", "1.06", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1
+ * for seagate controller, which causes
+ * SCSI code to reset bus.*/
+ {"QUANTUM", "LPS525S", "3110", BLIST_NOLUN}, /* Locks sometimes if polled for lun != 0 */
+ {"QUANTUM", "PD1225S", "3110", BLIST_NOLUN}, /* Locks sometimes if polled for lun != 0 */
+ {"QUANTUM", "FIREBALL ST4.3S", "0F0C", BLIST_NOLUN}, /* Locks up when polled for lun != 0 */
+ {"MEDIAVIS", "CDR-H93MV", "1.31", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"SANKYO", "CP525", "6.64", BLIST_NOLUN}, /* causes failed REQ SENSE, extra reset */
+ {"HP", "C1750A", "3226", BLIST_NOLUN}, /* scanjet iic */
+ {"HP", "C1790A", "", BLIST_NOLUN}, /* scanjet iip */
+ {"HP", "C2500A", "", BLIST_NOLUN}, /* scanjet iicx */
+ {"HP", "A6188A", "*", BLIST_SPARSELUN}, /* HP Va7100 Array */
+ {"HP", "A6189A", "*", BLIST_SPARSELUN}, /* HP Va7400 Array */
+ {"HP", "A6189B", "*", BLIST_SPARSELUN}, /* HP Va7410 Array */
+ {"HP", "OPEN-", "*", BLIST_SPARSELUN}, /* HP XP Arrays */
+ {"YAMAHA", "CDR100", "1.00", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"YAMAHA", "CDR102", "1.00", BLIST_NOLUN}, /* Locks up if polled for lun != 0
+ * extra reset */
+ {"YAMAHA", "CRW8424S", "1.0", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"YAMAHA", "CRW6416S", "1.0c", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"MITSUMI", "CD-R CR-2201CS", "6119", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */
+ {"RELISYS", "Scorpio", "*", BLIST_NOLUN}, /* responds to all LUN */
+ {"RELISYS", "VM3530+", "*", BLIST_NOLUN}, /* responds to all LUN */
+ {"ACROSS", "", "*", BLIST_NOLUN}, /* responds to all LUN */
+ {"MICROTEK", "ScanMaker II", "5.61", BLIST_NOLUN}, /* responds to all LUN */
+
+/*
+ * Other types of devices that have special flags.
+ */
+ {"SONY", "CD-ROM CDU-8001", "*", BLIST_BORKEN},
+ {"TEXEL", "CD-ROM", "1.06", BLIST_BORKEN},
+ {"IOMEGA", "Io20S *F", "*", BLIST_KEY},
+ {"INSITE", "Floptical F*8I", "*", BLIST_KEY},
+ {"INSITE", "I325VM", "*", BLIST_KEY},
+ {"LASOUND","CDX7405","3.10", BLIST_MAX5LUN | BLIST_SINGLELUN},
+ {"MICROP", "4110", "*", BLIST_NOTQ}, /* Buggy Tagged Queuing */
+ {"NRC", "MBR-7", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"NRC", "MBR-7.4", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"REGAL", "CDC-4X", "*", BLIST_MAX5LUN | BLIST_SINGLELUN},
+ {"NAKAMICH", "MJ-4.8S", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"NAKAMICH", "MJ-5.16S", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"PIONEER", "CD-ROM DRM-600", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"PIONEER", "CD-ROM DRM-602X", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"PIONEER", "CD-ROM DRM-604X", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"EMULEX", "MD21/S2 ESDI", "*", BLIST_SINGLELUN},
+ {"CANON", "IPUBJD", "*", BLIST_SPARSELUN},
+ {"nCipher", "Fastness Crypto", "*", BLIST_FORCELUN},
+ {"DEC","HSG80","*", BLIST_FORCELUN},
+ {"COMPAQ","LOGICAL VOLUME","*", BLIST_FORCELUN},
+ {"COMPAQ","CR3500","*", BLIST_FORCELUN},
+ {"NEC", "PD-1 ODX654P", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"MATSHITA", "PD-1", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN},
+ {"TOSHIBA","CDROM","*", BLIST_ISROM},
+ {"TOSHIBA","CD-ROM","*", BLIST_ISROM},
+ {"MegaRAID", "LD", "*", BLIST_FORCELUN},
+ {"DGC", "RAID", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (tgt @ LUN 0)
+ {"DGC", "DISK", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (no tgt @ LUN 0)
+ {"DELL", "PV660F", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+ {"DELL", "PV660F PSEUDO", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+ {"DELL", "PSEUDO DEVICE .", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F
+ {"DELL", "PV530F", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F
+ {"EMC", "SYMMETRIX", "*", BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN},
+ {"HP", "A6189A", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // HP VA7400, by Alar Aun
+ {"CMD", "CRA-7280", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // CMD RAID Controller
+ {"CNSI", "G7324", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Chaparral G7324 RAID
+ {"CNSi", "G8324", "*", BLIST_SPARSELUN}, // Chaparral G8324 RAID
+ {"Zzyzx", "RocketStor 500S", "*", BLIST_SPARSELUN},
+ {"Zzyzx", "RocketStor 2000", "*", BLIST_SPARSELUN},
+ {"SONY", "TSL", "*", BLIST_FORCELUN}, // DDS3 & DDS4 autoloaders
+ {"DELL", "PERCRAID", "*", BLIST_FORCELUN},
+ {"HP", "NetRAID-4M", "*", BLIST_FORCELUN},
+ {"ADAPTEC", "AACRAID", "*", BLIST_FORCELUN},
+ {"ADAPTEC", "Adaptec 5400S", "*", BLIST_FORCELUN},
+ {"COMPAQ", "MSA1000", "*", BLIST_FORCELUN},
+ {"HP", "C1557A", "*", BLIST_FORCELUN},
+ {"IBM", "AuSaV1S2", "*", BLIST_FORCELUN},
+ {"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+ {"DDN", "SAN DataDirector", "*", BLIST_SPARSELUN},
+ {"HITACHI", "DF400", "*", BLIST_SPARSELUN},
+ {"HITACHI", "DF500", "*", BLIST_SPARSELUN},
+ {"HITACHI", "DF600", "*", BLIST_SPARSELUN},
+
+ /*
+ * Must be at end of list...
+ */
+ {NULL, NULL, NULL}
+};
+
+#define MAX_SCSI_LUNS 0xFFFFFFFF
+
+#ifdef CONFIG_SCSI_MULTI_LUN
+static unsigned int max_scsi_luns = MAX_SCSI_LUNS;
+#else
+static unsigned int max_scsi_luns = 1;
+#endif
+
+#ifdef MODULE
+
+MODULE_PARM(max_scsi_luns, "i");
+MODULE_PARM_DESC(max_scsi_luns, "last scsi LUN (should be between 1 and 2^32-1)");
+
+#else
+
+static int __init scsi_luns_setup(char *str)
+{
+#if 0
+ unsigned int tmp;
+
+ if (get_option(&str, &tmp) == 1) {
+ max_scsi_luns = tmp;
+ return 1;
+ } else {
+ printk("scsi_luns_setup : usage max_scsi_luns=n "
+ "(n should be between 1 and 2^32-1)\n");
+ return 0;
+ }
+#else
+ return 0;
+#endif
+}
+
+__setup("max_scsi_luns=", scsi_luns_setup);
+
+#endif
+
+static void print_inquiry(unsigned char *data)
+{
+ int i;
+
+ printk(" Vendor: ");
+ for (i = 8; i < 16; i++) {
+ if (data[i] >= 0x20 && i < data[4] + 5)
+ printk("%c", data[i]);
+ else
+ printk(" ");
+ }
+
+ printk(" Model: ");
+ for (i = 16; i < 32; i++) {
+ if (data[i] >= 0x20 && i < data[4] + 5)
+ printk("%c", data[i]);
+ else
+ printk(" ");
+ }
+
+ printk(" Rev: ");
+ for (i = 32; i < 36; i++) {
+ if (data[i] >= 0x20 && i < data[4] + 5)
+ printk("%c", data[i]);
+ else
+ printk(" ");
+ }
+
+ printk("\n");
+
+ i = data[0] & 0x1f;
+
+ printk(" Type: %s ",
+ i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] : "Unknown ");
+ printk(" ANSI SCSI revision: %02x", data[2] & 0x07);
+ if ((data[2] & 0x07) == 1 && (data[3] & 0x0f) == 1)
+ printk(" CCS\n");
+ else
+ printk("\n");
+}
+
+static int get_device_flags(unsigned char *response_data)
+{
+ int i = 0;
+ unsigned char *pnt;
+ for (i = 0; 1; i++) {
+ if (device_list[i].vendor == NULL)
+ return 0;
+ pnt = &response_data[8];
+ while (*pnt && *pnt == ' ')
+ pnt++;
+ if (memcmp(device_list[i].vendor, pnt,
+ strlen(device_list[i].vendor)))
+ continue;
+ pnt = &response_data[16];
+ while (*pnt && *pnt == ' ')
+ pnt++;
+ if (memcmp(device_list[i].model, pnt,
+ strlen(device_list[i].model)))
+ continue;
+ return device_list[i].flags;
+ }
+ return 0;
+}
+
+/*
+ * Detecting SCSI devices :
+ * We scan all present host adapter's busses, from ID 0 to ID (max_id).
+ * We use the INQUIRY command, determine device type, and pass the ID /
+ * lun address of all sequential devices to the tape driver, all random
+ * devices to the disk driver.
+ */
+void scan_scsis(struct Scsi_Host *shpnt,
+ uint hardcoded,
+ uint hchannel,
+ uint hid,
+ uint hlun)
+{
+ uint channel;
+ unsigned int dev;
+ unsigned int lun;
+ unsigned int max_dev_lun;
+ unsigned char *scsi_result;
+ unsigned char scsi_result0[256];
+ Scsi_Device *SDpnt;
+ Scsi_Device *SDtail;
+ unsigned int sparse_lun;
+ int lun0_sl;
+
+ scsi_result = NULL;
+
+ SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device),
+ GFP_ATOMIC);
+ if (SDpnt) {
+ memset(SDpnt, 0, sizeof(Scsi_Device));
+ /*
+ * Register the queue for the device. All I/O requests will
+ * come in through here. We also need to register a pointer to
+ * ourselves, since the queue handler won't know what device
+ * the queue actually represents. We could look it up, but it
+ * is pointless work.
+ */
+ scsi_initialize_queue(SDpnt, shpnt);
+ SDpnt->request_queue.queuedata = (void *) SDpnt;
+ /* Make sure we have something that is valid for DMA purposes */
+ scsi_result = ((!shpnt->unchecked_isa_dma)
+ ? &scsi_result0[0] : kmalloc(512, GFP_DMA));
+ }
+
+ if (scsi_result == NULL) {
+ printk("Unable to obtain scsi_result buffer\n");
+ goto leave;
+ }
+ /*
+ * We must chain ourself in the host_queue, so commands can time out
+ */
+ SDpnt->queue_depth = 1;
+ SDpnt->host = shpnt;
+ SDpnt->online = TRUE;
+
+ initialize_merge_fn(SDpnt);
+
+#if 0
+ /*
+ * Initialize the object that we will use to wait for command blocks.
+ */
+ init_waitqueue_head(&SDpnt->scpnt_wait);
+#endif
+
+ /*
+ * Next, hook the device to the host in question.
+ */
+ SDpnt->prev = NULL;
+ SDpnt->next = NULL;
+ if (shpnt->host_queue != NULL) {
+ SDtail = shpnt->host_queue;
+ while (SDtail->next != NULL)
+ SDtail = SDtail->next;
+
+ SDtail->next = SDpnt;
+ SDpnt->prev = SDtail;
+ } else {
+ shpnt->host_queue = SDpnt;
+ }
+
+ /*
+ * We need to increment the counter for this one device so we can track
+ * when things are quiet.
+ */
+ if (hardcoded == 1) {
+ Scsi_Device *oldSDpnt = SDpnt;
+ struct Scsi_Device_Template *sdtpnt;
+ channel = hchannel;
+ if (channel > shpnt->max_channel)
+ goto leave;
+ dev = hid;
+ if (dev >= shpnt->max_id)
+ goto leave;
+ lun = hlun;
+ if (lun >= shpnt->max_lun)
+ goto leave;
+ if ((0 == lun) || (lun > 7))
+ lun0_sl = SCSI_3; /* actually don't care for 0 == lun */
+ else
+ lun0_sl = find_lun0_scsi_level(channel, dev, shpnt);
+ scan_scsis_single(channel, dev, lun, lun0_sl, &max_dev_lun,
+ &sparse_lun, &SDpnt, shpnt, scsi_result);
+ if (SDpnt != oldSDpnt) {
+
+ /* it could happen the blockdevice hasn't yet been inited */
+ /* queue_depth() moved from scsi_proc_info() so that
+ it is called before scsi_build_commandblocks() */
+ if (shpnt->select_queue_depths != NULL)
+ (shpnt->select_queue_depths)(shpnt,
+ shpnt->host_queue);
+
+ for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+ if (sdtpnt->init && sdtpnt->dev_noticed)
+ (*sdtpnt->init) ();
+
+ for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+ if (sdtpnt->attach) {
+ (*sdtpnt->attach) (oldSDpnt);
+ if (oldSDpnt->attached) {
+ scsi_build_commandblocks(oldSDpnt);
+ if (0 == oldSDpnt->has_cmdblocks) {
+ printk("scan_scsis: DANGER, no command blocks\n");
+ /* What to do now ?? */
+ }
+ }
+ }
+ }
+ scsi_resize_dma_pool();
+
+ for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+ if (sdtpnt->finish && sdtpnt->nr_dev) {
+ (*sdtpnt->finish) ();
+ }
+ }
+ }
+ } else {
+ /* Actual LUN. PC ordering is 0->n IBM/spec ordering is n->0 */
+ int order_dev;
+
+ for (channel = 0; channel <= shpnt->max_channel; channel++) {
+ for (dev = 0; dev < shpnt->max_id; ++dev) {
+ if (shpnt->reverse_ordering)
+ /* Shift to scanning 15,14,13... or 7,6,5,4, */
+ order_dev = shpnt->max_id - dev - 1;
+ else
+ order_dev = dev;
+
+ if (shpnt->this_id != order_dev) {
+
+ /*
+ * We need the for so our continue, etc. work fine. We put this in
+ * a variable so that we can override it during the scan if we
+ * detect a device *KNOWN* to have multiple logical units.
+ */
+ max_dev_lun = (max_scsi_luns < shpnt->max_lun ?
+ max_scsi_luns : shpnt->max_lun);
+ sparse_lun = 0;
+ for (lun = 0, lun0_sl = SCSI_2; lun < max_dev_lun; ++lun) {
+ /* don't probe further for luns > 7 for targets <= SCSI_2 */
+ if ((lun0_sl < SCSI_3) && (lun > 7))
+ break;
+
+ if (!scan_scsis_single(channel, order_dev, lun, lun0_sl,
+ &max_dev_lun, &sparse_lun, &SDpnt, shpnt,
+ scsi_result)
+ && !sparse_lun)
+ break; /* break means don't probe further for luns!=0 */
+ if (SDpnt && (0 == lun)) {
+ int bflags = get_device_flags (scsi_result);
+ if (bflags & BLIST_LARGELUN)
+ lun0_sl = SCSI_3; /* treat as SCSI 3 */
+ else
+ lun0_sl = SDpnt->scsi_level;
+ }
+ } /* for lun ends */
+ } /* if this_id != id ends */
+ } /* for dev ends */
+ } /* for channel ends */
+ } /* if/else hardcoded */
+
+ leave:
+
+ { /* Unchain SRpnt from host_queue */
+ Scsi_Device *prev, *next;
+ Scsi_Device *dqptr;
+
+ for (dqptr = shpnt->host_queue; dqptr != SDpnt; dqptr = dqptr->next)
+ continue;
+ if (dqptr) {
+ prev = dqptr->prev;
+ next = dqptr->next;
+ if (prev)
+ prev->next = next;
+ else
+ shpnt->host_queue = next;
+ if (next)
+ next->prev = prev;
+ }
+ }
+
+ /* Last device block does not exist. Free memory. */
+ if (SDpnt != NULL) {
+ blk_cleanup_queue(&SDpnt->request_queue);
+ kfree((char *) SDpnt);
+ }
+
+ /* If we allocated a buffer so we could do DMA, free it now */
+ if (scsi_result != &scsi_result0[0] && scsi_result != NULL) {
+ kfree(scsi_result);
+ } {
+ Scsi_Device *sdev;
+ Scsi_Cmnd *scmd;
+
+ SCSI_LOG_SCAN_BUS(4, printk("Host status for host %p:\n", shpnt));
+ for (sdev = shpnt->host_queue; sdev; sdev = sdev->next) {
+ SCSI_LOG_SCAN_BUS(4, printk("Device %d %p: ", sdev->id, sdev));
+ for (scmd = sdev->device_queue; scmd; scmd = scmd->next) {
+ SCSI_LOG_SCAN_BUS(4, printk("%p ", scmd));
+ }
+ SCSI_LOG_SCAN_BUS(4, printk("\n"));
+ }
+ }
+}
+
+/*
+ * The worker for scan_scsis.
+ * Returning 0 means Please don't ask further for lun!=0, 1 means OK go on.
+ * Global variables used : scsi_devices(linked list)
+ */
+static int scan_scsis_single(unsigned int channel, unsigned int dev,
+ unsigned int lun, int lun0_scsi_level,
+ unsigned int *max_dev_lun, unsigned int *sparse_lun,
+ Scsi_Device ** SDpnt2, struct Scsi_Host *shpnt,
+ char *scsi_result)
+{
+ char devname[64];
+ unsigned char scsi_cmd[MAX_COMMAND_SIZE];
+ struct Scsi_Device_Template *sdtpnt;
+ Scsi_Device *SDtail, *SDpnt = *SDpnt2;
+ Scsi_Request * SRpnt;
+ int bflags, type = -1;
+#ifdef DEVFS_MUST_DIE
+ extern devfs_handle_t scsi_devfs_handle;
+#endif
+ int scsi_level;
+
+ SDpnt->host = shpnt;
+ SDpnt->id = dev;
+ SDpnt->lun = lun;
+ SDpnt->channel = channel;
+ SDpnt->online = TRUE;
+
+ scsi_build_commandblocks(SDpnt);
+
+ /* Some low level driver could use device->type (DB) */
+ SDpnt->type = -1;
+
+ /*
+ * Assume that the device will have handshaking problems, and then fix
+ * this field later if it turns out it doesn't
+ */
+ SDpnt->borken = 1;
+ SDpnt->was_reset = 0;
+ SDpnt->expecting_cc_ua = 0;
+ SDpnt->starved = 0;
+
+ if (NULL == (SRpnt = scsi_allocate_request(SDpnt))) {
+ printk("scan_scsis_single: no memory\n");
+ return 0;
+ }
+
+ /*
+ * We used to do a TEST_UNIT_READY before the INQUIRY but that was
+ * not really necessary. Spec recommends using INQUIRY to scan for
+ * devices (and TEST_UNIT_READY to poll for media change). - Paul G.
+ */
+
+ SCSI_LOG_SCAN_BUS(3, printk("scsi: performing INQUIRY\n"));
+ /*
+ * Build an INQUIRY command block.
+ */
+ scsi_cmd[0] = INQUIRY;
+ if ((lun > 0) && (lun0_scsi_level <= SCSI_2))
+ scsi_cmd[1] = (lun << 5) & 0xe0;
+ else
+ scsi_cmd[1] = 0; /* SCSI_3 and higher, don't touch */
+ scsi_cmd[2] = 0;
+ scsi_cmd[3] = 0;
+ scsi_cmd[4] = 255;
+ scsi_cmd[5] = 0;
+ SRpnt->sr_cmd_len = 0;
+ SRpnt->sr_data_direction = SCSI_DATA_READ;
+
+ scsi_wait_req (SRpnt, (void *) scsi_cmd,
+ (void *) scsi_result,
+ 256, SCSI_TIMEOUT+4*HZ, 3);
+
+ SCSI_LOG_SCAN_BUS(3, printk("scsi: INQUIRY %s with code 0x%x\n",
+ SRpnt->sr_result ? "failed" : "successful", SRpnt->sr_result));
+
+ /*
+ * Now that we don't do TEST_UNIT_READY anymore, we must be prepared
+ * for media change conditions here, so cannot require zero result.
+ */
+ if (SRpnt->sr_result) {
+ if ((driver_byte(SRpnt->sr_result) & DRIVER_SENSE) != 0 &&
+ (SRpnt->sr_sense_buffer[2] & 0xf) == UNIT_ATTENTION &&
+ SRpnt->sr_sense_buffer[12] == 0x28 &&
+ SRpnt->sr_sense_buffer[13] == 0) {
+ /* not-ready to ready transition - good */
+ } else {
+ /* assume no peripheral if any other sort of error */
+ scsi_release_request(SRpnt);
+ return 0;
+ }
+ }
+
+ /*
+ * Check for SPARSELUN before checking the peripheral qualifier,
+ * so sparse lun devices are completely scanned.
+ */
+
+ /*
+ * Get any flags for this device.
+ */
+ bflags = get_device_flags (scsi_result);
+
+ if (bflags & BLIST_SPARSELUN) {
+ *sparse_lun = 1;
+ }
+ /*
+ * Check the peripheral qualifier field - this tells us whether LUNS
+ * are supported here or not.
+ */
+ if ((scsi_result[0] >> 5) == 3) {
+ scsi_release_request(SRpnt);
+ return 0; /* assume no peripheral if any sort of error */
+ }
+ /* The Toshiba ROM was "gender-changed" here as an inline hack.
+ This is now much more generic.
+ This is a mess: What we really want is to leave the scsi_result
+ alone, and just change the SDpnt structure. And the SDpnt is what
+ we want print_inquiry to print. -- REW
+ */
+ if (bflags & BLIST_ISDISK) {
+ scsi_result[0] = TYPE_DISK;
+ scsi_result[1] |= 0x80; /* removable */
+ }
+
+ if (bflags & BLIST_ISROM) {
+ scsi_result[0] = TYPE_ROM;
+ scsi_result[1] |= 0x80; /* removable */
+ }
+
+ memcpy(SDpnt->vendor, scsi_result + 8, 8);
+ memcpy(SDpnt->model, scsi_result + 16, 16);
+ memcpy(SDpnt->rev, scsi_result + 32, 4);
+
+ SDpnt->removable = (0x80 & scsi_result[1]) >> 7;
+ /* Use the peripheral qualifier field to determine online/offline */
+ if (((scsi_result[0] >> 5) & 7) == 1) SDpnt->online = FALSE;
+ else SDpnt->online = TRUE;
+ SDpnt->lockable = SDpnt->removable;
+ SDpnt->changed = 0;
+ SDpnt->access_count = 0;
+ SDpnt->busy = 0;
+ SDpnt->has_cmdblocks = 0;
+ /*
+ * Currently, all sequential devices are assumed to be tapes, all random
+ * devices disk, with the appropriate read only flags set for ROM / WORM
+ * treated as RO.
+ */
+ switch (type = (scsi_result[0] & 0x1f)) {
+ case TYPE_TAPE:
+ case TYPE_DISK:
+ case TYPE_PRINTER:
+ case TYPE_MOD:
+ case TYPE_PROCESSOR:
+ case TYPE_SCANNER:
+ case TYPE_MEDIUM_CHANGER:
+ case TYPE_ENCLOSURE:
+ case TYPE_COMM:
+ SDpnt->writeable = 1;
+ break;
+ case TYPE_WORM:
+ case TYPE_ROM:
+ SDpnt->writeable = 0;
+ break;
+ default:
+ printk("scsi: unknown type %d\n", type);
+ }
+
+ SDpnt->device_blocked = FALSE;
+ SDpnt->device_busy = 0;
+ SDpnt->single_lun = 0;
+ SDpnt->soft_reset =
+ (scsi_result[7] & 1) && ((scsi_result[3] & 7) == 2);
+ SDpnt->random = (type == TYPE_TAPE) ? 0 : 1;
+ SDpnt->type = (type & 0x1f);
+
+ print_inquiry(scsi_result);
+
+ sprintf (devname, "host%d/bus%d/target%d/lun%d",
+ SDpnt->host->host_no, SDpnt->channel, SDpnt->id, SDpnt->lun);
+#ifdef DEVFS_MUST_DIE
+ if (SDpnt->de) printk ("DEBUG: dir: \"%s\" already exists\n", devname);
+ else SDpnt->de = devfs_mk_dir (scsi_devfs_handle, devname, NULL);
+#endif
+
+ for (sdtpnt = scsi_devicelist; sdtpnt;
+ sdtpnt = sdtpnt->next)
+ if (sdtpnt->detect)
+ SDpnt->attached +=
+ (*sdtpnt->detect) (SDpnt);
+
+ SDpnt->scsi_level = scsi_result[2] & 0x07;
+ if (SDpnt->scsi_level >= 2 ||
+ (SDpnt->scsi_level == 1 &&
+ (scsi_result[3] & 0x0f) == 1))
+ SDpnt->scsi_level++;
+ scsi_level = SDpnt->scsi_level;
+
+ /*
+ * Accommodate drivers that want to sleep when they should be in a polling
+ * loop.
+ */
+ SDpnt->disconnect = 0;
+
+
+ /*
+ * Set the tagged_queue flag for SCSI-II devices that purport to support
+ * tagged queuing in the INQUIRY data.
+ */
+ SDpnt->tagged_queue = 0;
+ if ((SDpnt->scsi_level >= SCSI_2) &&
+ (scsi_result[7] & 2) &&
+ !(bflags & BLIST_NOTQ)) {
+ SDpnt->tagged_supported = 1;
+ SDpnt->current_tag = 0;
+ }
+ /*
+ * Some revisions of the Texel CD ROM drives have handshaking problems when
+ * used with the Seagate controllers. Before we know what type of device
+ * we're talking to, we assume it's borken and then change it here if it
+ * turns out that it isn't a TEXEL drive.
+ */
+ if ((bflags & BLIST_BORKEN) == 0)
+ SDpnt->borken = 0;
+
+ /*
+ * If we want to only allow I/O to one of the luns attached to this device
+ * at a time, then we set this flag.
+ */
+ if (bflags & BLIST_SINGLELUN)
+ SDpnt->single_lun = 1;
+
+ /*
+ * These devices need this "key" to unlock the devices so we can use it
+ */
+ if ((bflags & BLIST_KEY) != 0) {
+ printk("Unlocked floptical drive.\n");
+ SDpnt->lockable = 0;
+ scsi_cmd[0] = MODE_SENSE;
+ if (shpnt->max_lun <= 8)
+ scsi_cmd[1] = (lun << 5) & 0xe0;
+ else scsi_cmd[1] = 0; /* any other idea? */
+ scsi_cmd[2] = 0x2e;
+ scsi_cmd[3] = 0;
+ scsi_cmd[4] = 0x2a;
+ scsi_cmd[5] = 0;
+ SRpnt->sr_cmd_len = 0;
+ SRpnt->sr_data_direction = SCSI_DATA_READ;
+ scsi_wait_req (SRpnt, (void *) scsi_cmd,
+ (void *) scsi_result, 0x2a,
+ SCSI_TIMEOUT, 3);
+ }
+
+ scsi_release_request(SRpnt);
+ SRpnt = NULL;
+
+ scsi_release_commandblocks(SDpnt);
+
+ /*
+ * This device was already hooked up to the host in question,
+ * so at this point we just let go of it and it should be fine. We do need to
+ * allocate a new one and attach it to the host so that we can further scan the bus.
+ */
+ SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device), GFP_ATOMIC);
+ if (!SDpnt) {
+ printk("scsi: scan_scsis_single: Cannot malloc\n");
+ return 0;
+ }
+ memset(SDpnt, 0, sizeof(Scsi_Device));
+
+ *SDpnt2 = SDpnt;
+ SDpnt->queue_depth = 1;
+ SDpnt->host = shpnt;
+ SDpnt->online = TRUE;
+ SDpnt->scsi_level = scsi_level;
+
+ /*
+ * Register the queue for the device. All I/O requests will come
+ * in through here. We also need to register a pointer to
+ * ourselves, since the queue handler won't know what device
+ * the queue actually represents. We could look it up, but it
+ * is pointless work.
+ */
+ scsi_initialize_queue(SDpnt, shpnt);
+ SDpnt->host = shpnt;
+ initialize_merge_fn(SDpnt);
+
+ /*
+ * Mark this device as online, or otherwise we won't be able to do much with it.
+ */
+ SDpnt->online = TRUE;
+
+#if 0
+ /*
+ * Initialize the object that we will use to wait for command blocks.
+ */
+ init_waitqueue_head(&SDpnt->scpnt_wait);
+#endif
+
+ /*
+ * Since we just found one device, there had damn well better be one in the list
+ * already.
+ */
+ if (shpnt->host_queue == NULL)
+ panic("scan_scsis_single: Host queue == NULL\n");
+
+ SDtail = shpnt->host_queue;
+ while (SDtail->next) {
+ SDtail = SDtail->next;
+ }
+
+ /* Add this device to the linked list at the end */
+ SDtail->next = SDpnt;
+ SDpnt->prev = SDtail;
+ SDpnt->next = NULL;
+
+ /*
+ * Some scsi devices cannot be polled for lun != 0 due to firmware bugs
+ */
+ if (bflags & BLIST_NOLUN)
+ return 0; /* break; */
+
+ /*
+ * If this device is known to support sparse multiple units, override the
+ * other settings, and scan all of them.
+ */
+ if (bflags & BLIST_SPARSELUN) {
+ *max_dev_lun = shpnt->max_lun;
+ *sparse_lun = 1;
+ return 1;
+ }
+ /*
+ * If this device is known to support multiple units, override the other
+ * settings, and scan all of them.
+ */
+ if (bflags & BLIST_FORCELUN) {
+ /*
+ * Scanning MAX_SCSI_LUNS units would be a bad idea.
+ * Any better idea?
+ * I think we need REPORT LUNS in future to avoid scanning
+ * of unused LUNs. But, that is another item.
+ */
+ if (*max_dev_lun < shpnt->max_lun)
+ *max_dev_lun = shpnt->max_lun;
+ else if ((max_scsi_luns >> 1) >= *max_dev_lun)
+ *max_dev_lun += shpnt->max_lun;
+ else *max_dev_lun = max_scsi_luns;
+ return 1;
+ }
+ /*
+ * REGAL CDC-4X: avoid hang after LUN 4
+ */
+ if (bflags & BLIST_MAX5LUN) {
+ *max_dev_lun = 5;
+ return 1;
+ }
+
+ /*
+ * We assume the device can't handle lun!=0 if: - it reports scsi-0
+ * (ANSI SCSI Revision 0) (old drives like MAXTOR XT-3280) or - it
+ * reports scsi-1 (ANSI SCSI Revision 1) and Response Data Format 0
+ */
+ if (((scsi_result[2] & 0x07) == 0)
+ ||
+ ((scsi_result[2] & 0x07) == 1 &&
+ (scsi_result[3] & 0x0f) == 0))
+ return 0;
+ return 1;
+}
+
+/*
+ * The worker for scan_scsis.
+ * Returns the scsi_level of lun0 on this host, channel and dev (if already
+ * known), otherwise returns SCSI_2.
+ */
+static int find_lun0_scsi_level(unsigned int channel, unsigned int dev,
+ struct Scsi_Host *shpnt)
+{
+ int res = SCSI_2;
+ Scsi_Device *SDpnt;
+
+ for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next)
+ {
+ if ((0 == SDpnt->lun) && (dev == SDpnt->id) &&
+ (channel == SDpnt->channel))
+ return (int)SDpnt->scsi_level;
+ }
+ /* haven't found lun0, should send INQUIRY but take easy route */
+ return res;
+}
diff --git a/xen/drivers/scsi/scsi_syms.c b/xen/drivers/scsi/scsi_syms.c
new file mode 100644
index 0000000000..82b23e84ce
--- /dev/null
+++ b/xen/drivers/scsi/scsi_syms.c
@@ -0,0 +1,105 @@
+/*
+ * We should not even be trying to compile this if we are not doing
+ * a module.
+ */
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/* #include <xeno/string.h> */
+/* #include <xeno/slab.h> */
+/* #include <xeno/ioport.h> */
+/* #include <xeno/kernel.h> */
+#include <xeno/blk.h>
+/* #include <xeno/fs.h> */
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include <scsi/scsi_ioctl.h>
+#include "hosts.h"
+#include "constants.h"
+
+#include "sd.h"
+#include <scsi/scsicam.h>
+
+/*
+ * This source file contains the symbol table used by scsi loadable
+ * modules.
+ */
+EXPORT_SYMBOL(scsi_register_module);
+EXPORT_SYMBOL(scsi_unregister_module);
+EXPORT_SYMBOL(scsi_free);
+EXPORT_SYMBOL(scsi_malloc);
+EXPORT_SYMBOL(scsi_register);
+EXPORT_SYMBOL(scsi_unregister);
+EXPORT_SYMBOL(scsicam_bios_param);
+EXPORT_SYMBOL(scsi_partsize);
+EXPORT_SYMBOL(scsi_allocate_device);
+EXPORT_SYMBOL(scsi_do_cmd);
+EXPORT_SYMBOL(scsi_command_size);
+EXPORT_SYMBOL(scsi_ioctl);
+EXPORT_SYMBOL(print_command);
+EXPORT_SYMBOL(print_sense);
+EXPORT_SYMBOL(print_req_sense);
+EXPORT_SYMBOL(print_msg);
+EXPORT_SYMBOL(print_status);
+EXPORT_SYMBOL(scsi_dma_free_sectors);
+EXPORT_SYMBOL(kernel_scsi_ioctl);
+EXPORT_SYMBOL(scsi_need_isa_buffer);
+EXPORT_SYMBOL(scsi_release_command);
+EXPORT_SYMBOL(print_Scsi_Cmnd);
+EXPORT_SYMBOL(scsi_block_when_processing_errors);
+EXPORT_SYMBOL(scsi_mark_host_reset);
+EXPORT_SYMBOL(scsi_ioctl_send_command);
+#if defined(CONFIG_SCSI_LOGGING) /* { */
+EXPORT_SYMBOL(scsi_logging_level);
+#endif
+
+EXPORT_SYMBOL(scsi_allocate_request);
+EXPORT_SYMBOL(scsi_release_request);
+EXPORT_SYMBOL(scsi_wait_req);
+EXPORT_SYMBOL(scsi_do_req);
+
+EXPORT_SYMBOL(scsi_report_bus_reset);
+EXPORT_SYMBOL(scsi_block_requests);
+EXPORT_SYMBOL(scsi_unblock_requests);
+
+EXPORT_SYMBOL(scsi_get_host_dev);
+EXPORT_SYMBOL(scsi_free_host_dev);
+
+EXPORT_SYMBOL(scsi_sleep);
+
+EXPORT_SYMBOL(proc_print_scsidevice);
+EXPORT_SYMBOL(proc_scsi);
+
+EXPORT_SYMBOL(scsi_io_completion);
+EXPORT_SYMBOL(scsi_end_request);
+
+EXPORT_SYMBOL(scsi_register_blocked_host);
+EXPORT_SYMBOL(scsi_deregister_blocked_host);
+
+/*
+ * This symbol is for the highlevel drivers (e.g. sg) only.
+ */
+EXPORT_SYMBOL(scsi_reset_provider);
+
+/*
+ * These are here only while I debug the rest of the scsi stuff.
+ */
+EXPORT_SYMBOL(scsi_hostlist);
+EXPORT_SYMBOL(scsi_hosts);
+EXPORT_SYMBOL(scsi_devicelist);
+EXPORT_SYMBOL(scsi_device_types);
+
+/*
+ * Externalize timers so that HBAs can safely start/restart commands.
+ */
+extern void scsi_add_timer(Scsi_Cmnd *, int, void ((*) (Scsi_Cmnd *)));
+extern int scsi_delete_timer(Scsi_Cmnd *);
+EXPORT_SYMBOL(scsi_add_timer);
+EXPORT_SYMBOL(scsi_delete_timer);
diff --git a/xen/drivers/scsi/scsicam.c b/xen/drivers/scsi/scsicam.c
new file mode 100644
index 0000000000..ae13d27cdc
--- /dev/null
+++ b/xen/drivers/scsi/scsicam.c
@@ -0,0 +1,236 @@
+/*
+ * scsicam.c - SCSI CAM support functions, use for HDIO_GETGEO, etc.
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@Colorado.EDU
+ * +1 (303) 786-7975
+ *
+ * For more information, please consult the SCSI-CAM draft.
+ */
+
+#define __NO_VERSION__
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+
+/*#include <linux/fs.h>*/
+/*#include <linux/genhd.h>*/
+#include <xeno/blk.h>
+/*#include <linux/kernel.h>*/
+#include <asm/unaligned.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+#include <scsi/scsicam.h>
+
+static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
+ unsigned int *secs);
+
+
+/*
+ * Function : int scsicam_bios_param (Disk *disk, int dev, int *ip)
+ *
+ * Purpose : to determine the BIOS mapping used for a drive in a
+ * SCSI-CAM system, storing the results in ip as required
+ * by the HDIO_GETGEO ioctl().
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ */
+
+int scsicam_bios_param(Disk * disk, /* SCSI disk */
+ kdev_t dev, /* Device major, minor */
+ int *ip /* Heads, sectors, cylinders in that order */ )
+{
+ struct buffer_head *bh;
+ int ret_code;
+ int size = disk->capacity;
+ unsigned long temp_cyl;
+
+#if 0
+ if (!(bh = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, block_size(dev))))
+ return -1;
+#else
+ bh = NULL;
+ printk("scsicam_bios_param: bread not avail!\n");
+ BUG();
+#endif
+
+ /* try to infer mapping from partition table */
+ ret_code = scsi_partsize(bh, (unsigned long) size, (unsigned int *) ip + 2,
+ (unsigned int *) ip + 0, (unsigned int *) ip + 1);
+#if 0
+ brelse(bh);
+#endif
+
+ if (ret_code == -1) {
+ /* pick some standard mapping with at most 1024 cylinders,
+ and at most 62 sectors per track - this works up to
+ 7905 MB */
+ ret_code = setsize((unsigned long) size, (unsigned int *) ip + 2,
+ (unsigned int *) ip + 0, (unsigned int *) ip + 1);
+ }
+ /* if something went wrong, then apparently we have to return
+ a geometry with more than 1024 cylinders */
+ if (ret_code || ip[0] > 255 || ip[1] > 63) {
+ ip[0] = 64;
+ ip[1] = 32;
+ temp_cyl = size / (ip[0] * ip[1]);
+ if (temp_cyl > 65534) {
+ ip[0] = 255;
+ ip[1] = 63;
+ }
+ ip[2] = size / (ip[0] * ip[1]);
+ }
+ return 0;
+}
+
+/*
+ * Function : static int scsi_partsize(struct buffer_head *bh, unsigned long
+ * capacity,unsigned int *cyls, unsigned int *hds, unsigned int *secs);
+ *
+ * Purpose : to determine the BIOS mapping used to create the partition
+ * table, storing the results in *cyls, *hds, and *secs
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ */
+
+int scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+ unsigned int *cyls, unsigned int *hds, unsigned int *secs)
+{
+ struct partition *p, *largest = NULL;
+ int i, largest_cyl;
+ int cyl, ext_cyl, end_head, end_cyl, end_sector;
+ unsigned int logical_end, physical_end, ext_physical_end;
+
+
+ if (*(unsigned short *) (bh->b_data + 510) == 0xAA55) {
+ for (largest_cyl = -1, p = (struct partition *)
+ (0x1BE + bh->b_data), i = 0; i < 4; ++i, ++p) {
+ if (!p->sys_ind)
+ continue;
+#ifdef DEBUG
+ printk("scsicam_bios_param : partition %d has system \n",
+ i);
+#endif
+ cyl = p->cyl + ((p->sector & 0xc0) << 2);
+ if (cyl > largest_cyl) {
+ largest_cyl = cyl;
+ largest = p;
+ }
+ }
+ }
+ if (largest) {
+ end_cyl = largest->end_cyl + ((largest->end_sector & 0xc0) << 2);
+ end_head = largest->end_head;
+ end_sector = largest->end_sector & 0x3f;
+
+ if (end_head + 1 == 0 || end_sector == 0)
+ return -1;
+
+#ifdef DEBUG
+ printk("scsicam_bios_param : end at h = %d, c = %d, s = %d\n",
+ end_head, end_cyl, end_sector);
+#endif
+
+ physical_end = end_cyl * (end_head + 1) * end_sector +
+ end_head * end_sector + end_sector;
+
+ /* This is the actual _sector_ number at the end */
+ logical_end = get_unaligned(&largest->start_sect)
+ + get_unaligned(&largest->nr_sects);
+
+ /* This is for >1023 cylinders */
+ ext_cyl = (logical_end - (end_head * end_sector + end_sector))
+ / (end_head + 1) / end_sector;
+ ext_physical_end = ext_cyl * (end_head + 1) * end_sector +
+ end_head * end_sector + end_sector;
+
+#ifdef DEBUG
+ printk("scsicam_bios_param : logical_end=%d physical_end=%d ext_physical_end=%d ext_cyl=%d\n"
+ ,logical_end, physical_end, ext_physical_end, ext_cyl);
+#endif
+
+ if ((logical_end == physical_end) ||
+ (end_cyl == 1023 && ext_physical_end == logical_end)) {
+ *secs = end_sector;
+ *hds = end_head + 1;
+ *cyls = capacity / ((end_head + 1) * end_sector);
+ return 0;
+ }
+#ifdef DEBUG
+ printk("scsicam_bios_param : logical (%u) != physical (%u)\n",
+ logical_end, physical_end);
+#endif
+ }
+ return -1;
+}
+
+/*
+ * Function : static int setsize(unsigned long capacity,unsigned int *cyls,
+ * unsigned int *hds, unsigned int *secs);
+ *
+ * Purpose : to determine a near-optimal int 0x13 mapping for a
+ * SCSI disk in terms of lost space of size capacity, storing
+ * the results in *cyls, *hds, and *secs.
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ * Extracted from
+ *
+ * WORKING X3T9.2
+ * DRAFT 792D
+ *
+ *
+ * Revision 6
+ * 10-MAR-94
+ * Information technology -
+ * SCSI-2 Common access method
+ * transport and SCSI interface module
+ *
+ * ANNEX A :
+ *
+ * setsize() converts a read capacity value to int 13h
+ * head-cylinder-sector requirements. It minimizes the value for
+ * number of heads and maximizes the number of cylinders. This
+ * will support rather large disks before the number of heads
+ * will not fit in 4 bits (or 6 bits). This algorithm also
+ * minimizes the number of sectors that will be unused at the end
+ * of the disk while allowing for very large disks to be
+ * accommodated. This algorithm does not use physical geometry.
+ */
+
+static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
+ unsigned int *secs)
+{
+ unsigned int rv = 0;
+ unsigned long heads, sectors, cylinders, temp;
+
+ cylinders = 1024L; /* Set number of cylinders to max */
+ sectors = 62L; /* Maximize sectors per track */
+
+ temp = cylinders * sectors; /* Compute divisor for heads */
+ heads = capacity / temp; /* Compute value for number of heads */
+ if (capacity % temp) { /* If no remainder, done! */
+ heads++; /* Else, increment number of heads */
+ temp = cylinders * heads; /* Compute divisor for sectors */
+ sectors = capacity / temp; /* Compute value for sectors per
+ track */
+ if (capacity % temp) { /* If no remainder, done! */
+ sectors++; /* Else, increment number of sectors */
+ temp = heads * sectors; /* Compute divisor for cylinders */
+ cylinders = capacity / temp; /* Compute number of cylinders */
+ }
+ }
+ if (cylinders == 0)
+ rv = (unsigned) -1; /* Give error if 0 cylinders */
+
+ *cyls = (unsigned int) cylinders; /* Stuff return values */
+ *secs = (unsigned int) sectors;
+ *hds = (unsigned int) heads;
+ return (rv);
+}
diff --git a/xen/drivers/scsi/sd.c b/xen/drivers/scsi/sd.c
new file mode 100644
index 0000000000..dbb69d2447
--- /dev/null
+++ b/xen/drivers/scsi/sd.c
@@ -0,0 +1,1512 @@
+/*
+ * sd.c Copyright (C) 1992 Drew Eckhardt
+ * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale
+ *
+ * Linux scsi disk driver
+ * Initial versions: Drew Eckhardt
+ * Subsequent revisions: Eric Youngdale
+ *
+ * <drew@colorado.edu>
+ *
+ * Modified by Eric Youngdale ericy@andante.org to
+ * add scatter-gather, multiple outstanding request, and other
+ * enhancements.
+ *
+ * Modified by Eric Youngdale eric@andante.org to support loadable
+ * low-level scsi drivers.
+ *
+ * Modified by Jirka Hanika geo@ff.cuni.cz to support more
+ * scsi disks using eight major numbers.
+ *
+ * Modified by Richard Gooch rgooch@atnf.csiro.au to support devfs.
+ *
+ * Modified by Torben Mathiasen tmm@image.dk
+ * Resource allocation fixes in sd_init and cleanups.
+ *
+ * Modified by Alex Davis <letmein@erols.com>
+ * Fix problem where partition info not being read in sd_open.
+ *
+ * Modified by Alex Davis <letmein@erols.com>
+ * Fix problem where removable media could be ejected after sd_open.
+ */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+/* #include <xeno/fs.h> */
+/* #include <xeno/kernel.h> */
+#include <xeno/sched.h>
+/* #include <xeno/mm.h> */
+/* #include <xeno/string.h> */
+#include <xeno/hdreg.h>
+/* #include <xeno/errno.h> */
+/* #include <xeno/interrupt.h> */
+#include <xeno/init.h>
+
+/* #include <xeno/smp.h> */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#define MAJOR_NR SCSI_DISK0_MAJOR
+#include <xeno/blk.h>
+#include <xeno/blkpg.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+#include <scsi/scsi_ioctl.h>
+#include "constants.h"
+#include <scsi/scsicam.h> /* must follow "hosts.h" */
+
+#include <xeno/genhd.h>
+
+/*
+ * static const char RCSid[] = "$Header:";
+ */
+
+/* system major --> sd_gendisks index */
+#define SD_MAJOR_IDX(i) (MAJOR(i) & SD_MAJOR_MASK)
+/* sd_gendisks index --> system major */
+#define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i))
+
+#define SD_PARTITION(dev) ((SD_MAJOR_IDX(dev) << 8) | (MINOR(dev) & 255))
+
+#define SCSI_DISKS_PER_MAJOR 16
+#define SD_MAJOR_NUMBER(i) SD_MAJOR((i) >> 8)
+#define SD_MINOR_NUMBER(i) ((i) & 255)
+#define MKDEV_SD_PARTITION(i) MKDEV(SD_MAJOR_NUMBER(i), (i) & 255)
+#define MKDEV_SD(index) MKDEV_SD_PARTITION((index) << 4)
+#define N_USED_SCSI_DISKS (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1)
+#define N_USED_SD_MAJORS (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR)
+
+#define MAX_RETRIES 5
+
+/*
+ * Time out in seconds for disks and Magneto-opticals (which are slower).
+ */
+
+#define SD_TIMEOUT (30 * HZ)
+#define SD_MOD_TIMEOUT (75 * HZ)
+
+static Scsi_Disk *rscsi_disks;
+static struct gendisk *sd_gendisks;
+static int *sd_sizes;
+static int *sd_blocksizes;
+static int *sd_hardsizes; /* Hardware sector size */
+static int *sd_max_sectors;
+
+static int check_scsidisk_media_change(kdev_t);
+static int fop_revalidate_scsidisk(kdev_t);
+
+static int sd_init_onedisk(int);
+
+
+static int sd_init(void);
+static void sd_finish(void);
+static int sd_attach(Scsi_Device *);
+static int sd_detect(Scsi_Device *);
+static void sd_detach(Scsi_Device *);
+static int sd_init_command(Scsi_Cmnd *);
+
+static struct Scsi_Device_Template sd_template = {
+ name:"disk",
+ tag:"sd",
+ scsi_type:TYPE_DISK,
+ major:SCSI_DISK0_MAJOR,
+ /*
+ * Secondary range of majors that this driver handles.
+ */
+ min_major:SCSI_DISK1_MAJOR,
+ max_major:SCSI_DISK7_MAJOR,
+ blk:1,
+ detect:sd_detect,
+ init:sd_init,
+ finish:sd_finish,
+ attach:sd_attach,
+ detach:sd_detach,
+ init_command:sd_init_command,
+};
+
+
+static void rw_intr(Scsi_Cmnd * SCpnt);
+
+#if defined(CONFIG_PPC)
+/*
+ * Moved from arch/ppc/pmac_setup.c. This is where it really belongs.
+ */
+kdev_t __init
+sd_find_target(void *host, int tgt)
+{
+ Scsi_Disk *dp;
+ int i;
+ for (dp = rscsi_disks, i = 0; i < sd_template.dev_max; ++i, ++dp)
+ if (dp->device != NULL && dp->device->host == host
+ && dp->device->id == tgt)
+ return MKDEV_SD(i);
+ return 0;
+}
+#endif
+
+static int sd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+ kdev_t dev = inode->i_rdev;
+ struct Scsi_Host * host;
+ Scsi_Device * SDev;
+ int diskinfo[4];
+
+ SDev = rscsi_disks[DEVICE_NR(dev)].device;
+ if (!SDev)
+ return -ENODEV;
+
+ /*
+ * If we are in the middle of error recovery, don't let anyone
+ * else try and use this device. Also, if error recovery fails, it
+ * may try and take the device offline, in which case all further
+ * access to the device is prohibited.
+ */
+
+ if( !scsi_block_when_processing_errors(SDev) )
+ {
+ return -ENODEV;
+ }
+
+ switch (cmd)
+ {
+ case HDIO_GETGEO: /* Return BIOS disk parameters */
+ {
+ struct hd_geometry *loc = (struct hd_geometry *) arg;
+ if(!loc)
+ return -EINVAL;
+
+ host = rscsi_disks[DEVICE_NR(dev)].device->host;
+
+ /* default to most commonly used values */
+
+ diskinfo[0] = 0x40;
+ diskinfo[1] = 0x20;
+ diskinfo[2] =
+ rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
+
+ /* override with calculated, extended default,
+ or driver values */
+
+ if(host->hostt->bios_param != NULL)
+ host->hostt->bios_param(
+ &rscsi_disks[DEVICE_NR(dev)], dev,
+ &diskinfo[0]);
+ else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)],
+ dev, &diskinfo[0]);
+
+ if (put_user(diskinfo[0], &loc->heads) ||
+ put_user(diskinfo[1], &loc->sectors) ||
+ put_user(diskinfo[2], &loc->cylinders) ||
+ put_user(sd_gendisks[SD_MAJOR_IDX(
+ inode->i_rdev)].part[MINOR(
+ inode->i_rdev)].start_sect, &loc->start))
+ return -EFAULT;
+ return 0;
+ }
+ case HDIO_GETGEO_BIG:
+ {
+ struct hd_big_geometry *loc =
+ (struct hd_big_geometry *) arg;
+
+ if(!loc)
+ return -EINVAL;
+
+ host = rscsi_disks[DEVICE_NR(dev)].device->host;
+
+ /* default to most commonly used values */
+
+ diskinfo[0] = 0x40;
+ diskinfo[1] = 0x20;
+ diskinfo[2] =
+ rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
+
+ /* override with calculated, extended default,
+ or driver values */
+
+ if(host->hostt->bios_param != NULL)
+ host->hostt->bios_param(
+ &rscsi_disks[DEVICE_NR(dev)], dev,
+ &diskinfo[0]);
+ else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)],
+ dev, &diskinfo[0]);
+
+ if (put_user(diskinfo[0], &loc->heads) ||
+ put_user(diskinfo[1], &loc->sectors) ||
+ put_user(diskinfo[2],
+ (unsigned int *) &loc->cylinders) ||
+ put_user(sd_gendisks[SD_MAJOR_IDX(
+ inode->i_rdev)].part[MINOR(
+ inode->i_rdev)].start_sect, &loc->start))
+ return -EFAULT;
+ return 0;
+ }
+#if 0
+ case BLKGETSIZE:
+ case BLKGETSIZE64:
+ case BLKROSET:
+ case BLKROGET:
+ case BLKRASET:
+ case BLKRAGET:
+ case BLKFLSBUF:
+ case BLKSSZGET:
+ case BLKPG:
+ case BLKELVGET:
+ case BLKELVSET:
+ case BLKBSZGET:
+ case BLKBSZSET:
+ return blk_ioctl(inode->i_rdev, cmd, arg);
+
+ case BLKRRPART: /* Re-read partition tables */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ return revalidate_scsidisk(dev, 1);
+#endif
+
+ default:
+ return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device,
+ cmd, (void *) arg);
+ }
+}
+
+static void sd_devname(unsigned int disknum, char *buffer)
+{
+ if (disknum < 26)
+ sprintf(buffer, "sd%c", 'a' + disknum);
+ else {
+ unsigned int min1;
+ unsigned int min2;
+ /*
+ * For larger numbers of disks, we need to go to a new
+ * naming scheme.
+ */
+ min1 = disknum / 26;
+ min2 = disknum % 26;
+ sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
+ }
+}
+
+static request_queue_t *sd_find_queue(kdev_t dev)
+{
+ Scsi_Disk *dpnt;
+ int target;
+ target = DEVICE_NR(dev);
+
+ dpnt = &rscsi_disks[target];
+ if (!dpnt->device)
+ return NULL; /* No such device */
+ return &dpnt->device->request_queue;
+}
+
+static int sd_init_command(Scsi_Cmnd * SCpnt)
+{
+ int dev, block, this_count;
+ struct hd_struct *ppnt;
+ Scsi_Disk *dpnt;
+#if CONFIG_SCSI_LOGGING
+ char nbuff[6];
+#endif
+
+ ppnt = &sd_gendisks[SD_MAJOR_IDX(SCpnt->request.rq_dev)].part[MINOR(SCpnt->request.rq_dev)];
+ dev = DEVICE_NR(SCpnt->request.rq_dev);
+
+ block = SCpnt->request.sector;
+ this_count = SCpnt->request_bufflen >> 9;
+
+ SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = 0x%x, block = %d\n",
+ SCpnt->request.rq_dev, block));
+
+ dpnt = &rscsi_disks[dev];
+ if (dev >= sd_template.dev_max ||
+ !dpnt->device ||
+ !dpnt->device->online ||
+ block + SCpnt->request.nr_sectors > ppnt->nr_sects) {
+ SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n",
+ SCpnt->request.nr_sectors));
+ SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
+ return 0;
+ }
+ block += ppnt->start_sect;
+ if (dpnt->device->changed) {
+ /*
+ * quietly refuse to do anything to a changed disc until the changed
+ * bit has been reset
+ */
+ /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */
+ return 0;
+ }
+ SCSI_LOG_HLQUEUE(2, sd_devname(dev, nbuff));
+ SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n",
+ nbuff, dev, block));
+
+ /*
+ * If we have a 1K hardware sectorsize, prevent access to single
+ * 512 byte sectors. In theory we could handle this - in fact
+ * the scsi cdrom driver must be able to handle this because
+ * we typically use 1K blocksizes, and cdroms typically have
+ * 2K hardware sectorsizes. Of course, things are simpler
+ * with the cdrom, since it is read-only. For performance
+ * reasons, the filesystems should be able to handle this
+ * and not force the scsi disk driver to use bounce buffers
+ * for this.
+ */
+ if (dpnt->device->sector_size == 1024) {
+ if ((block & 1) || (SCpnt->request.nr_sectors & 1)) {
+ printk("sd.c:Bad block number requested");
+ return 0;
+ } else {
+ block = block >> 1;
+ this_count = this_count >> 1;
+ }
+ }
+ if (dpnt->device->sector_size == 2048) {
+ if ((block & 3) || (SCpnt->request.nr_sectors & 3)) {
+ printk("sd.c:Bad block number requested");
+ return 0;
+ } else {
+ block = block >> 2;
+ this_count = this_count >> 2;
+ }
+ }
+ if (dpnt->device->sector_size == 4096) {
+ if ((block & 7) || (SCpnt->request.nr_sectors & 7)) {
+ printk("sd.c:Bad block number requested");
+ return 0;
+ } else {
+ block = block >> 3;
+ this_count = this_count >> 3;
+ }
+ }
+ switch (SCpnt->request.cmd) {
+ case WRITE:
+ if (!dpnt->device->writeable) {
+ return 0;
+ }
+ SCpnt->cmnd[0] = WRITE_6;
+ SCpnt->sc_data_direction = SCSI_DATA_WRITE;
+ break;
+ case READ:
+ SCpnt->cmnd[0] = READ_6;
+ SCpnt->sc_data_direction = SCSI_DATA_READ;
+ break;
+ default:
+ panic("Unknown sd command %d\n", SCpnt->request.cmd);
+ }
+
+ SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n", nbuff,
+ (SCpnt->request.cmd == WRITE) ? "writing" :
+ "reading", this_count,
+ SCpnt->request.nr_sectors));
+
+ SCpnt->cmnd[1] = (SCpnt->device->scsi_level <= SCSI_2) ?
+ ((SCpnt->lun << 5) & 0xe0) : 0;
+
+ if (((this_count > 0xff) || (block > 0x1fffff)) || SCpnt->device->ten) {
+ if (this_count > 0xffff)
+ this_count = 0xffff;
+
+ SCpnt->cmnd[0] += READ_10 - READ_6;
+ SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
+ SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
+ SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
+ SCpnt->cmnd[5] = (unsigned char) block & 0xff;
+ SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0;
+ SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
+ SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
+ } else {
+ if (this_count > 0xff)
+ this_count = 0xff;
+
+ SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
+ SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff);
+ SCpnt->cmnd[3] = (unsigned char) block & 0xff;
+ SCpnt->cmnd[4] = (unsigned char) this_count;
+ SCpnt->cmnd[5] = 0;
+ }
+
+ /*
+ * We shouldn't disconnect in the middle of a sector, so with a dumb
+ * host adapter, it's safe to assume that we can at least transfer
+ * this many bytes between each connect / disconnect.
+ */
+ SCpnt->transfersize = dpnt->device->sector_size;
+ SCpnt->underflow = this_count << 9;
+
+ SCpnt->allowed = MAX_RETRIES;
+ SCpnt->timeout_per_command = (SCpnt->device->type == TYPE_DISK ?
+ SD_TIMEOUT : SD_MOD_TIMEOUT);
+
+ /*
+ * This is the completion routine we use. This is matched in terms
+ * of capability to this function.
+ */
+ SCpnt->done = rw_intr;
+
+ /*
+ * This indicates that the command is ready from our end to be
+ * queued.
+ */
+ return 1;
+}
+
+static int sd_open(struct inode *inode, struct file *filp)
+{
+ int target, retval = -ENXIO;
+ Scsi_Device * SDev;
+ target = DEVICE_NR(inode->i_rdev);
+
+ SCSI_LOG_HLQUEUE(1, printk("target=%d, max=%d\n", target, sd_template.dev_max));
+
+ if (target >= sd_template.dev_max || !rscsi_disks[target].device)
+ return -ENXIO; /* No such device */
+
+ /*
+ * If the device is in error recovery, wait until it is done.
+ * If the device is offline, then disallow any access to it.
+ */
+ if (!scsi_block_when_processing_errors(rscsi_disks[target].device)) {
+ return -ENXIO;
+ }
+ /*
+ * Make sure that only one process can do a check_change_disk at one time.
+ * This is also used to lock out further access when the partition table
+ * is being re-read.
+ */
+
+ while (rscsi_disks[target].device->busy) {
+ barrier();
+ cpu_relax();
+ }
+ /*
+ * The following code can sleep.
+ * Module unloading must be prevented
+ */
+ SDev = rscsi_disks[target].device;
+ if (SDev->host->hostt->module)
+ __MOD_INC_USE_COUNT(SDev->host->hostt->module);
+ if (sd_template.module)
+ __MOD_INC_USE_COUNT(sd_template.module);
+ SDev->access_count++;
+
+#if 0
+ if (rscsi_disks[target].device->removable) {
+ SDev->allow_revalidate = 1;
+ check_disk_change(inode->i_rdev);
+ SDev->allow_revalidate = 0;
+
+
+ /*
+ * If the drive is empty, just let the open fail.
+ */
+ if ((!rscsi_disks[target].ready) && !(filp->f_flags & O_NDELAY)) {
+ retval = -ENOMEDIUM;
+ goto error_out;
+ }
+
+ /*
+ * Similarly, if the device has the write protect tab set,
+ * have the open fail if the user expects to be able to write
+ * to the thing.
+ */
+ if ((rscsi_disks[target].write_prot) && (filp->f_mode & 2)) {
+ retval = -EROFS;
+ goto error_out;
+ }
+ }
+#endif
+
+ /*
+ * It is possible that the disk changing stuff resulted in the device
+ * being taken offline. If this is the case, report this to the user,
+ * and don't pretend that
+ * the open actually succeeded.
+ */
+ if (!SDev->online) {
+ goto error_out;
+ }
+ /*
+ * See if we are requesting a non-existent partition. Do this
+ * after checking for disk change.
+ */
+ if (sd_sizes[SD_PARTITION(inode->i_rdev)] == 0) {
+ goto error_out;
+ }
+
+ if (SDev->removable)
+ if (SDev->access_count==1)
+ if (scsi_block_when_processing_errors(SDev))
+ scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL);
+
+
+ return 0;
+
+ error_out:
+ SDev->access_count--;
+ if (SDev->host->hostt->module)
+ __MOD_DEC_USE_COUNT(SDev->host->hostt->module);
+ if (sd_template.module)
+ __MOD_DEC_USE_COUNT(sd_template.module);
+ return retval;
+}
+
+static int sd_release(struct inode *inode, struct file *file)
+{
+ int target;
+ Scsi_Device * SDev;
+
+ target = DEVICE_NR(inode->i_rdev);
+ SDev = rscsi_disks[target].device;
+ if (!SDev)
+ return -ENODEV;
+
+ SDev->access_count--;
+
+ if (SDev->removable) {
+ if (!SDev->access_count)
+ if (scsi_block_when_processing_errors(SDev))
+ scsi_ioctl(SDev, SCSI_IOCTL_DOORUNLOCK, NULL);
+ }
+ if (SDev->host->hostt->module)
+ __MOD_DEC_USE_COUNT(SDev->host->hostt->module);
+ if (sd_template.module)
+ __MOD_DEC_USE_COUNT(sd_template.module);
+ return 0;
+}
+
+static struct block_device_operations sd_fops =
+{
+/* owner: THIS_MODULE, */
+ open: sd_open,
+ release: sd_release,
+ ioctl: sd_ioctl,
+ check_media_change: check_scsidisk_media_change,
+ revalidate: fop_revalidate_scsidisk
+};
+
+/*
+ * If we need more than one SCSI disk major (i.e. more than
+ * 16 SCSI disks), we'll have to kmalloc() more gendisks later.
+ */
+
+static struct gendisk sd_gendisk =
+{
+ major: SCSI_DISK0_MAJOR,
+ major_name: "sd",
+ minor_shift: 4,
+ max_p: 1 << 4,
+ fops: &sd_fops,
+};
+
+#define SD_GENDISK(i) sd_gendisks[(i) / SCSI_DISKS_PER_MAJOR]
+
+/*
+ * rw_intr is the interrupt routine for the device driver.
+ * It will be notified on the end of a SCSI read / write, and
+ * will take one of several actions based on success or failure.
+ */
+
+static void rw_intr(Scsi_Cmnd * SCpnt)
+{
+ int result = SCpnt->result;
+#if CONFIG_SCSI_LOGGING
+ char nbuff[6];
+#endif
+ int this_count = SCpnt->bufflen >> 9;
+ int good_sectors = (result == 0 ? this_count : 0);
+ int block_sectors = 1;
+ long error_sector;
+
+ SCSI_LOG_HLCOMPLETE(1, sd_devname(DEVICE_NR(SCpnt->request.rq_dev),
+ nbuff));
+
+ SCSI_LOG_HLCOMPLETE(1, printk("%s : rw_intr(%d, %x [%x %x])\n", nbuff,
+ SCpnt->host->host_no,
+ result,
+ SCpnt->sense_buffer[0],
+ SCpnt->sense_buffer[2]));
+
+ /*
+ Handle MEDIUM ERRORs that indicate partial success. Since this is a
+ relatively rare error condition, no care is taken to avoid
+ unnecessary additional work such as memcpy's that could be avoided.
+ */
+
+ /* An error occurred */
+ if (driver_byte(result) != 0 && /* An error occured */
+ SCpnt->sense_buffer[0] == 0xF0) { /* Sense data is valid */
+ switch (SCpnt->sense_buffer[2]) {
+ case MEDIUM_ERROR:
+ error_sector = (SCpnt->sense_buffer[3] << 24) |
+ (SCpnt->sense_buffer[4] << 16) |
+ (SCpnt->sense_buffer[5] << 8) |
+ SCpnt->sense_buffer[6];
+ if (SCpnt->request.bh != NULL)
+ block_sectors = SCpnt->request.bh->b_size >> 9;
+ switch (SCpnt->device->sector_size) {
+ case 1024:
+ error_sector <<= 1;
+ if (block_sectors < 2)
+ block_sectors = 2;
+ break;
+ case 2048:
+ error_sector <<= 2;
+ if (block_sectors < 4)
+ block_sectors = 4;
+ break;
+ case 4096:
+ error_sector <<=3;
+ if (block_sectors < 8)
+ block_sectors = 8;
+ break;
+ case 256:
+ error_sector >>= 1;
+ break;
+ default:
+ break;
+ }
+ error_sector -= sd_gendisks[SD_MAJOR_IDX(
+ SCpnt->request.rq_dev)].part[MINOR(
+ SCpnt->request.rq_dev)].start_sect;
+ error_sector &= ~(block_sectors - 1);
+ good_sectors = error_sector - SCpnt->request.sector;
+ if (good_sectors < 0 || good_sectors >= this_count)
+ good_sectors = 0;
+ break;
+
+ case RECOVERED_ERROR:
+ /*
+ * An error occured, but it recovered. Inform the
+ * user, but make sure that it's not treated as a
+ * hard error.
+ */
+ print_sense("sd", SCpnt);
+ result = 0;
+ SCpnt->sense_buffer[0] = 0x0;
+ good_sectors = this_count;
+ break;
+
+ case ILLEGAL_REQUEST:
+ if (SCpnt->device->ten == 1) {
+ if (SCpnt->cmnd[0] == READ_10 ||
+ SCpnt->cmnd[0] == WRITE_10)
+ SCpnt->device->ten = 0;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ /*
+ * This calls the generic completion function, now that we know
+ * how many actual sectors finished, and how many sectors we need
+ * to say have failed.
+ */
+ scsi_io_completion(SCpnt, good_sectors, block_sectors);
+}
+/*
+ * requeue_sd_request() is the request handler function for the sd driver.
+ * Its function in life is to take block device requests, and translate
+ * them to SCSI commands.
+ */
+
+
+static int check_scsidisk_media_change(kdev_t full_dev)
+{
+ int retval;
+ int target;
+ int flag = 0;
+ Scsi_Device * SDev;
+
+ target = DEVICE_NR(full_dev);
+ SDev = rscsi_disks[target].device;
+
+ if (target >= sd_template.dev_max || !SDev) {
+ printk("SCSI disk request error: invalid device.\n");
+ return 0;
+ }
+ if (!SDev->removable)
+ return 0;
+
+ /*
+ * If the device is offline, don't send any commands - just pretend as
+ * if the command failed. If the device ever comes back online, we
+ * can deal with it then. It is only because of unrecoverable errors
+ * that we would ever take a device offline in the first place.
+ */
+ if (SDev->online == FALSE) {
+ rscsi_disks[target].ready = 0;
+ SDev->changed = 1;
+ return 1; /* This will force a flush, if called from
+ * check_disk_change */
+ }
+
+ /* Using Start/Stop enables differentiation between drive with
+ * no cartridge loaded - NOT READY, drive with changed cartridge -
+ * UNIT ATTENTION, or with same cartridge - GOOD STATUS.
+ * This also handles drives that auto spin down. eg iomega jaz 1GB
+ * as this will spin up the drive.
+ */
+ retval = -ENODEV;
+ if (scsi_block_when_processing_errors(SDev))
+ retval = scsi_ioctl(SDev, SCSI_IOCTL_START_UNIT, NULL);
+
+ if (retval) { /* Unable to test, unit probably not ready.
+ * This usually means there is no disc in the
+ * drive. Mark as changed, and we will figure
+ * it out later once the drive is available
+ * again. */
+
+ rscsi_disks[target].ready = 0;
+ SDev->changed = 1;
+ return 1; /* This will force a flush, if called from
+ * check_disk_change */
+ }
+ /*
+ * for removable scsi disk ( FLOPTICAL ) we have to recognise the
+ * presence of disk in the drive. This is kept in the Scsi_Disk
+ * struct and tested at open ! Daniel Roche ( dan@lectra.fr )
+ */
+
+ rscsi_disks[target].ready = 1; /* FLOPTICAL */
+
+ retval = SDev->changed;
+ if (!flag)
+ SDev->changed = 0;
+ return retval;
+}
+
+static int sd_init_onedisk(int i)
+{
+ unsigned char cmd[10];
+ char nbuff[6];
+ unsigned char *buffer;
+ unsigned long spintime_value = 0;
+ int the_result, retries, spintime;
+ int sector_size;
+ Scsi_Request *SRpnt;
+
+ /*
+ * Get the name of the disk, in case we need to log it somewhere.
+ */
+ sd_devname(i, nbuff);
+
+ /*
+ * If the device is offline, don't try and read capacity or any
+ * of the other niceties.
+ */
+ if (rscsi_disks[i].device->online == FALSE)
+ return i;
+
+ /*
+ * We need to retry the READ_CAPACITY because a UNIT_ATTENTION is
+ * considered a fatal error, and many devices report such an error
+ * just after a scsi bus reset.
+ */
+
+ SRpnt = scsi_allocate_request(rscsi_disks[i].device);
+ if (!SRpnt) {
+ printk(KERN_WARNING
+ "(sd_init_onedisk:) Request allocation failure.\n");
+ return i;
+ }
+
+ buffer = (unsigned char *) scsi_malloc(512);
+ if (!buffer) {
+ printk(KERN_WARNING "(sd_init_onedisk:) Memory allocation failure.\n");
+ scsi_release_request(SRpnt);
+ return i;
+ }
+
+ spintime = 0;
+
+ /* Spin up drives, as required. Only do this at boot time */
+ /* Spinup needs to be done for module loads too. */
+ do {
+ retries = 0;
+
+ while (retries < 3) {
+ cmd[0] = TEST_UNIT_READY;
+ cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+ ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+ memset((void *) &cmd[2], 0, 8);
+ SRpnt->sr_cmd_len = 0;
+ SRpnt->sr_sense_buffer[0] = 0;
+ SRpnt->sr_sense_buffer[2] = 0;
+ SRpnt->sr_data_direction = SCSI_DATA_NONE;
+
+ scsi_wait_req (SRpnt, (void *) cmd, (void *) buffer,
+ 0/*512*/, SD_TIMEOUT, MAX_RETRIES);
+
+ the_result = SRpnt->sr_result;
+ retries++;
+ if (the_result == 0
+ || SRpnt->sr_sense_buffer[2] != UNIT_ATTENTION)
+ break;
+ }
+
+ /*
+ * If the drive has indicated to us that it doesn't have
+ * any media in it, don't bother with any of the rest of
+ * this crap.
+ */
+ if( the_result != 0
+ && ((driver_byte(the_result) & DRIVER_SENSE) != 0)
+ && SRpnt->sr_sense_buffer[2] == UNIT_ATTENTION
+ && SRpnt->sr_sense_buffer[12] == 0x3A ) {
+ rscsi_disks[i].capacity = 0x1fffff;
+ sector_size = 512;
+ rscsi_disks[i].device->changed = 1;
+ rscsi_disks[i].ready = 0;
+ break;
+ }
+
+ /* Look for non-removable devices that return NOT_READY.
+ * Issue command to spin up drive for these cases. */
+ if (the_result && !rscsi_disks[i].device->removable &&
+ SRpnt->sr_sense_buffer[2] == NOT_READY) {
+ unsigned long time1;
+ if (!spintime) {
+ printk("%s: Spinning up disk...", nbuff);
+ cmd[0] = START_STOP;
+ cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+ ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+ cmd[1] |= 1; /* Return immediately */
+ memset((void *) &cmd[2], 0, 8);
+ cmd[4] = 1; /* Start spin cycle */
+ SRpnt->sr_cmd_len = 0;
+ SRpnt->sr_sense_buffer[0] = 0;
+ SRpnt->sr_sense_buffer[2] = 0;
+
+ SRpnt->sr_data_direction = SCSI_DATA_READ;
+ scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+ 0/*512*/, SD_TIMEOUT, MAX_RETRIES);
+ spintime_value = jiffies;
+ }
+ spintime = 1;
+ time1 = HZ;
+ /* Wait 1 second for next try */
+ do {
+ current->state = TASK_UNINTERRUPTIBLE;
+ time1 = schedule_timeout(time1);
+ } while(time1);
+ printk(".");
+ }
+ } while (the_result && spintime &&
+ time_after(spintime_value + 100 * HZ, jiffies));
+ if (spintime) {
+ if (the_result)
+ printk("not responding...\n");
+ else
+ printk("ready\n");
+ }
+ retries = 3;
+ do {
+ cmd[0] = READ_CAPACITY;
+ cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+ ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+ memset((void *) &cmd[2], 0, 8);
+ memset((void *) buffer, 0, 8);
+ SRpnt->sr_cmd_len = 0;
+ SRpnt->sr_sense_buffer[0] = 0;
+ SRpnt->sr_sense_buffer[2] = 0;
+
+ SRpnt->sr_data_direction = SCSI_DATA_READ;
+ scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+ 8, SD_TIMEOUT, MAX_RETRIES);
+
+ the_result = SRpnt->sr_result;
+ retries--;
+
+ } while (the_result && retries);
+
+ /*
+ * The SCSI standard says:
+ * "READ CAPACITY is necessary for self configuring software"
+ * While not mandatory, support of READ CAPACITY is strongly
+ * encouraged.
+ * We used to die if we couldn't successfully do a READ CAPACITY.
+ * But, now we go on about our way. The side effects of this are
+ *
+ * 1. We can't know block size with certainty. I have said
+ * "512 bytes is it" as this is most common.
+ *
+ * 2. Recovery from when someone attempts to read past the
+ * end of the raw device will be slower.
+ */
+
+ if (the_result) {
+ printk("%s : READ CAPACITY failed.\n"
+ "%s : status = %x, message = %02x, host = %d, driver = %02x \n",
+ nbuff, nbuff,
+ status_byte(the_result),
+ msg_byte(the_result),
+ host_byte(the_result),
+ driver_byte(the_result)
+ );
+ if (driver_byte(the_result) & DRIVER_SENSE)
+ print_req_sense("sd", SRpnt);
+ else
+ printk("%s : sense not available. \n", nbuff);
+
+ printk("%s : block size assumed to be 512 bytes, disk size 1GB. \n",
+ nbuff);
+ rscsi_disks[i].capacity = 0x1fffff;
+ sector_size = 512;
+
+ /* Set dirty bit for removable devices if not ready -
+ * sometimes drives will not report this properly. */
+ if (rscsi_disks[i].device->removable &&
+ SRpnt->sr_sense_buffer[2] == NOT_READY)
+ rscsi_disks[i].device->changed = 1;
+
+ } else {
+ /*
+ * FLOPTICAL, if read_capa is ok, drive is assumed to be ready
+ */
+ rscsi_disks[i].ready = 1;
+
+ rscsi_disks[i].capacity = 1 + ((buffer[0] << 24) |
+ (buffer[1] << 16) |
+ (buffer[2] << 8) |
+ buffer[3]);
+
+ sector_size = (buffer[4] << 24) |
+ (buffer[5] << 16) | (buffer[6] << 8) | buffer[7];
+
+ if (sector_size == 0) {
+ sector_size = 512;
+ printk("%s : sector size 0 reported, assuming 512.\n",
+ nbuff);
+ }
+ if (sector_size != 512 &&
+ sector_size != 1024 &&
+ sector_size != 2048 &&
+ sector_size != 4096 &&
+ sector_size != 256) {
+ printk("%s : unsupported sector size %d.\n",
+ nbuff, sector_size);
+ /*
+ * The user might want to re-format the drive with
+ * a supported sectorsize. Once this happens, it
+ * would be relatively trivial to set the thing up.
+ * For this reason, we leave the thing in the table.
+ */
+ rscsi_disks[i].capacity = 0;
+ }
+ if (sector_size > 1024) {
+ int m;
+
+ /*
+ * We must fix the sd_blocksizes and sd_hardsizes
+ * to allow us to read the partition tables.
+ * The disk reading code does not allow for reading
+ * of partial sectors.
+ */
+ for (m = i << 4; m < ((i + 1) << 4); m++) {
+ sd_blocksizes[m] = sector_size;
+ }
+ } {
+ /*
+ * The msdos fs needs to know the hardware sector size
+ * So I have created this table. See ll_rw_blk.c
+ * Jacques Gelinas (Jacques@solucorp.qc.ca)
+ */
+ int m;
+ int hard_sector = sector_size;
+ int sz = rscsi_disks[i].capacity * (hard_sector/256);
+
+ /* There are 16 minors allocated for each major device */
+ for (m = i << 4; m < ((i + 1) << 4); m++) {
+ sd_hardsizes[m] = hard_sector;
+ }
+
+ printk("SCSI device %s: "
+ "%d %d-byte hdwr sectors (%d MB)\n",
+ nbuff, rscsi_disks[i].capacity,
+ hard_sector, (sz/2 - sz/1250 + 974)/1950);
+ }
+
+ /* Rescale capacity to 512-byte units */
+ if (sector_size == 4096)
+ rscsi_disks[i].capacity <<= 3;
+ if (sector_size == 2048)
+ rscsi_disks[i].capacity <<= 2;
+ if (sector_size == 1024)
+ rscsi_disks[i].capacity <<= 1;
+ if (sector_size == 256)
+ rscsi_disks[i].capacity >>= 1;
+ }
+
+
+ /*
+ * Unless otherwise specified, this is not write protected.
+ */
+ rscsi_disks[i].write_prot = 0;
+ if (rscsi_disks[i].device->removable && rscsi_disks[i].ready) {
+ /* FLOPTICAL */
+
+ /*
+ * For removable scsi disk ( FLOPTICAL ) we have to recognise
+ * the Write Protect Flag. This flag is kept in the Scsi_Disk
+ * struct and tested at open !
+ * Daniel Roche ( dan@lectra.fr )
+ *
+ * Changed to get all pages (0x3f) rather than page 1 to
+ * get around devices which do not have a page 1. Since
+ * we're only interested in the header anyway, this should
+ * be fine.
+ * -- Matthew Dharm (mdharm-scsi@one-eyed-alien.net)
+ */
+
+ memset((void *) &cmd[0], 0, 8);
+ cmd[0] = MODE_SENSE;
+ cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+ ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+ cmd[2] = 0x3f; /* Get all pages */
+ cmd[4] = 255; /* Ask for 255 bytes, even tho we want just the first 8 */
+ SRpnt->sr_cmd_len = 0;
+ SRpnt->sr_sense_buffer[0] = 0;
+ SRpnt->sr_sense_buffer[2] = 0;
+
+ /* same code as READCAPA !! */
+ SRpnt->sr_data_direction = SCSI_DATA_READ;
+ scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+ 512, SD_TIMEOUT, MAX_RETRIES);
+
+ the_result = SRpnt->sr_result;
+
+ if (the_result) {
+ printk("%s: test WP failed, assume Write Enabled\n", nbuff);
+ } else {
+ rscsi_disks[i].write_prot = ((buffer[2] & 0x80) != 0);
+ printk("%s: Write Protect is %s\n", nbuff,
+ rscsi_disks[i].write_prot ? "on" : "off");
+ }
+
+ } /* check for write protect */
+ SRpnt->sr_device->ten = 1;
+ SRpnt->sr_device->remap = 1;
+ SRpnt->sr_device->sector_size = sector_size;
+ /* Wake up a process waiting for device */
+ scsi_release_request(SRpnt);
+ SRpnt = NULL;
+
+ scsi_free(buffer, 512);
+ return i;
+}
+
+/*
+ * The sd_init() function looks at all SCSI drives present, determines
+ * their size, and reads partition table entries for them.
+ */
+
+static int sd_registered;
+
+static int sd_init()
+{
+ int i;
+
+ if (sd_template.dev_noticed == 0)
+ return 0;
+
+ if (!rscsi_disks)
+ sd_template.dev_max = sd_template.dev_noticed + SD_EXTRA_DEVS;
+
+ if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR)
+ sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR;
+
+ if (!sd_registered) {
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+#ifdef DEVFS_MUST_DIE
+ if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) {
+ printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i));
+ sd_template.dev_noticed = 0;
+ return 1;
+ }
+#endif
+ }
+ sd_registered++;
+ }
+ /* We do not support attaching loadable devices yet. */
+ if (rscsi_disks)
+ return 0;
+
+ rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC);
+ if (!rscsi_disks)
+ goto cleanup_devfs;
+ memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk));
+
+ /* for every (necessary) major: */
+ sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
+ if (!sd_sizes)
+ goto cleanup_disks;
+ memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int));
+
+ sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int),
+ GFP_ATOMIC);
+ if (!sd_blocksizes)
+ goto cleanup_sizes;
+
+ sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int),
+ GFP_ATOMIC);
+ if (!sd_hardsizes)
+ goto cleanup_blocksizes;
+
+ sd_max_sectors = kmalloc((sd_template.dev_max << 4) * sizeof(int),
+ GFP_ATOMIC);
+ if (!sd_max_sectors)
+ goto cleanup_max_sectors;
+
+ for (i = 0; i < sd_template.dev_max << 4; i++) {
+ sd_blocksizes[i] = 1024;
+ sd_hardsizes[i] = 512;
+ /*
+ * Allow lowlevel device drivers to generate 512k large scsi
+ * commands if they know what they're doing and they ask for it
+ * explicitly via the SHpnt->max_sectors API.
+ */
+ sd_max_sectors[i] = MAX_SEGMENTS*8;
+ }
+
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+ blksize_size[SD_MAJOR(i)] = sd_blocksizes +
+ i * (SCSI_DISKS_PER_MAJOR << 4);
+ hardsect_size[SD_MAJOR(i)] = sd_hardsizes +
+ i * (SCSI_DISKS_PER_MAJOR << 4);
+ max_sectors[SD_MAJOR(i)] = sd_max_sectors +
+ i * (SCSI_DISKS_PER_MAJOR << 4);
+ }
+
+ sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk),
+ GFP_ATOMIC);
+ if (!sd_gendisks)
+ goto cleanup_sd_gendisks;
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+ sd_gendisks[i] = sd_gendisk; /* memcpy */
+#ifdef DEVFS_MUST_DIE
+ sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR *
+ sizeof *sd_gendisks[i].de_arr,
+ GFP_ATOMIC);
+ if (!sd_gendisks[i].de_arr)
+ goto cleanup_gendisks_de_arr;
+ memset (sd_gendisks[i].de_arr, 0,
+ SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr);
+#endif
+ sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR *
+ sizeof *sd_gendisks[i].flags,
+ GFP_ATOMIC);
+ if (!sd_gendisks[i].flags)
+ goto cleanup_gendisks_flags;
+ memset (sd_gendisks[i].flags, 0,
+ SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags);
+ sd_gendisks[i].major = SD_MAJOR(i);
+ sd_gendisks[i].major_name = "sd";
+ sd_gendisks[i].minor_shift = 4;
+ sd_gendisks[i].max_p = 1 << 4;
+ sd_gendisks[i].part = kmalloc((SCSI_DISKS_PER_MAJOR << 4) *
+ sizeof(struct hd_struct),
+ GFP_ATOMIC);
+ if (!sd_gendisks[i].part)
+ goto cleanup_gendisks_part;
+ memset(sd_gendisks[i].part, 0, (SCSI_DISKS_PER_MAJOR << 4) *
+ sizeof(struct hd_struct));
+ sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4);
+ sd_gendisks[i].nr_real = 0;
+ sd_gendisks[i].real_devices =
+ (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR);
+ }
+
+ return 0;
+
+ cleanup_gendisks_part:
+ kfree(sd_gendisks[i].flags);
+ cleanup_gendisks_flags:
+#ifdef DEVFS_MUST_DIE
+ kfree(sd_gendisks[i].de_arr);
+ cleanup_gendisks_de_arr:
+#endif
+ while (--i >= 0 ) {
+#ifdef DEVFS_MUST_DIE
+ kfree(sd_gendisks[i].de_arr);
+#endif
+ kfree(sd_gendisks[i].flags);
+ kfree(sd_gendisks[i].part);
+ }
+ kfree(sd_gendisks);
+ sd_gendisks = NULL;
+ cleanup_sd_gendisks:
+ kfree(sd_max_sectors);
+ cleanup_max_sectors:
+ kfree(sd_hardsizes);
+ cleanup_blocksizes:
+ kfree(sd_blocksizes);
+ cleanup_sizes:
+ kfree(sd_sizes);
+ cleanup_disks:
+ kfree(rscsi_disks);
+ rscsi_disks = NULL;
+ cleanup_devfs:
+#ifdef DEVFS_MUST_DIE
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+ devfs_unregister_blkdev(SD_MAJOR(i), "sd");
+ }
+#endif
+ sd_registered--;
+ sd_template.dev_noticed = 0;
+ return 1;
+}
+
+
+static void sd_finish()
+{
+ int i;
+
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+ blk_dev[SD_MAJOR(i)].queue = sd_find_queue;
+ add_gendisk(&sd_gendisks[i]);
+ }
+
+ for (i = 0; i < sd_template.dev_max; ++i)
+ if (!rscsi_disks[i].capacity && rscsi_disks[i].device) {
+ sd_init_onedisk(i);
+ if (!rscsi_disks[i].has_part_table) {
+ sd_sizes[i << 4] = rscsi_disks[i].capacity;
+ register_disk(&SD_GENDISK(i), MKDEV_SD(i),
+ 1<<4, &sd_fops,
+ rscsi_disks[i].capacity);
+ rscsi_disks[i].has_part_table = 1;
+ }
+ }
+#if 0
+ /* If our host adapter is capable of scatter-gather, then we increase
+ * the read-ahead to 60 blocks (120 sectors). If not, we use
+ * a two block (4 sector) read ahead. We can only respect this with the
+ * granularity of every 16 disks (one device major).
+ */
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+ read_ahead[SD_MAJOR(i)] =
+ (rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device
+ && rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device->host->sg_tablesize)
+ ? 120 /* 120 sector read-ahead */
+ : 4; /* 4 sector read-ahead */
+ }
+#endif
+
+ return;
+}
+
+static int sd_detect(Scsi_Device * SDp)
+{
+ if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD)
+ return 0;
+ sd_template.dev_noticed++;
+ return 1;
+}
+
+static int sd_attach(Scsi_Device * SDp)
+{
+ unsigned int devnum;
+ Scsi_Disk *dpnt;
+ int i;
+ char nbuff[6];
+
+ if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD)
+ return 0;
+
+ if (sd_template.nr_dev >= sd_template.dev_max || rscsi_disks == NULL) {
+ SDp->attached--;
+ return 1;
+ }
+ for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++)
+ if (!dpnt->device)
+ break;
+
+ if (i >= sd_template.dev_max) {
+ printk(KERN_WARNING "scsi_devices corrupt (sd),"
+ " nr_dev %d dev_max %d\n",
+ sd_template.nr_dev, sd_template.dev_max);
+ SDp->attached--;
+ return 1;
+ }
+
+ rscsi_disks[i].device = SDp;
+ rscsi_disks[i].has_part_table = 0;
+ sd_template.nr_dev++;
+ SD_GENDISK(i).nr_real++;
+ devnum = i % SCSI_DISKS_PER_MAJOR;
+#ifdef DEVFS_MUST_DIE
+ SD_GENDISK(i).de_arr[devnum] = SDp->de;
+#endif
+ if (SDp->removable)
+ SD_GENDISK(i).flags[devnum] |= GENHD_FL_REMOVABLE;
+ sd_devname(i, nbuff);
+ printk("Attached scsi %sdisk %s at scsi%d, channel %d, id %d, lun %d\n",
+ SDp->removable ? "removable " : "",
+ nbuff, SDp->host->host_no, SDp->channel, SDp->id, SDp->lun);
+ return 0;
+}
+
+#define DEVICE_BUSY rscsi_disks[target].device->busy
+#define ALLOW_REVALIDATE rscsi_disks[target].device->allow_revalidate
+#define USAGE rscsi_disks[target].device->access_count
+#define CAPACITY rscsi_disks[target].capacity
+#define MAYBE_REINIT sd_init_onedisk(target)
+
+/* This routine is called to flush all partitions and partition tables
+ * for a changed scsi disk, and then re-read the new partition table.
+ * If we are revalidating a disk because of a media change, then we
+ * enter with usage == 0. If we are using an ioctl, we automatically have
+ * usage == 1 (we need an open channel to use an ioctl :-), so this
+ * is our limit.
+ */
+int revalidate_scsidisk(kdev_t dev, int maxusage)
+{
+ struct gendisk *sdgd;
+ int target;
+ int max_p;
+ int start;
+ int i;
+
+ target = DEVICE_NR(dev);
+
+ if (DEVICE_BUSY || (ALLOW_REVALIDATE == 0 && USAGE > maxusage)) {
+ printk("Device busy for revalidation (usage=%d)\n", USAGE);
+ return -EBUSY;
+ }
+ DEVICE_BUSY = 1;
+
+ sdgd = &SD_GENDISK(target);
+ max_p = sd_gendisk.max_p;
+ start = target << sd_gendisk.minor_shift;
+
+ for (i = max_p - 1; i >= 0; i--) {
+ int index = start + i;
+ invalidate_device(MKDEV_SD_PARTITION(index), 1);
+ sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0;
+ sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0;
+ /*
+ * Reset the blocksize for everything so that we can read
+ * the partition table. Technically we will determine the
+ * correct block size when we revalidate, but we do this just
+ * to make sure that everything remains consistent.
+ */
+ sd_blocksizes[index] = 1024;
+ if (rscsi_disks[target].device->sector_size == 2048)
+ sd_blocksizes[index] = 2048;
+ else
+ sd_blocksizes[index] = 1024;
+ }
+
+#ifdef MAYBE_REINIT
+ MAYBE_REINIT;
+#endif
+
+ grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR,
+ 1<<4, CAPACITY);
+
+ DEVICE_BUSY = 0;
+ return 0;
+}
+
+static int fop_revalidate_scsidisk(kdev_t dev)
+{
+ return revalidate_scsidisk(dev, 0);
+}
+
+static void sd_detach(Scsi_Device * SDp)
+{
+ Scsi_Disk *dpnt;
+ struct gendisk *sdgd;
+ int i, j;
+ int max_p;
+ int start;
+
+ if (rscsi_disks == NULL)
+ return;
+
+ for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++)
+ if (dpnt->device == SDp) {
+
+ /* If we are disconnecting a disk driver, sync and invalidate
+ * everything */
+ sdgd = &SD_GENDISK(i);
+ max_p = sd_gendisk.max_p;
+ start = i << sd_gendisk.minor_shift;
+
+ for (j = max_p - 1; j >= 0; j--) {
+ int index = start + j;
+ invalidate_device(MKDEV_SD_PARTITION(index), 1);
+ sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0;
+ sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0;
+ sd_sizes[index] = 0;
+ }
+#ifdef DEVFS_MUST_DIE
+ devfs_register_partitions (sdgd,
+ SD_MINOR_NUMBER (start), 1);
+#endif
+ /* unregister_disk() */
+ dpnt->has_part_table = 0;
+ dpnt->device = NULL;
+ dpnt->capacity = 0;
+ SDp->attached--;
+ sd_template.dev_noticed--;
+ sd_template.nr_dev--;
+ SD_GENDISK(i).nr_real--;
+ return;
+ }
+ return;
+}
+
+static int __init init_sd(void)
+{
+ sd_template.module = THIS_MODULE;
+ return scsi_register_module(MODULE_SCSI_DEV, &sd_template);
+}
+
+static void __exit exit_sd(void)
+{
+ int i;
+
+#if 0
+ scsi_unregister_module(MODULE_SCSI_DEV, &sd_template);
+#endif
+
+#ifdef DEVFS_MUST_DIE
+ for (i = 0; i < N_USED_SD_MAJORS; i++)
+ devfs_unregister_blkdev(SD_MAJOR(i), "sd");
+#endif
+
+ sd_registered--;
+ if (rscsi_disks != NULL) {
+ kfree(rscsi_disks);
+ kfree(sd_sizes);
+ kfree(sd_blocksizes);
+ kfree(sd_hardsizes);
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+#if 0 /* XXX aren't we forgetting to deallocate something? */
+ kfree(sd_gendisks[i].de_arr);
+ kfree(sd_gendisks[i].flags);
+#endif
+ kfree(sd_gendisks[i].part);
+ }
+ }
+ for (i = 0; i < N_USED_SD_MAJORS; i++) {
+ del_gendisk(&sd_gendisks[i]);
+ blk_size[SD_MAJOR(i)] = NULL; /* XXX blksize_size actually? */
+ hardsect_size[SD_MAJOR(i)] = NULL;
+#if 0
+ read_ahead[SD_MAJOR(i)] = 0;
+#endif
+ }
+ sd_template.dev_max = 0;
+ if (sd_gendisks != NULL) /* kfree tests for 0, but leave explicit */
+ kfree(sd_gendisks);
+}
+
+module_init(init_sd);
+module_exit(exit_sd);
+MODULE_LICENSE("GPL");
diff --git a/xen/drivers/scsi/sd.h b/xen/drivers/scsi/sd.h
new file mode 100644
index 0000000000..8e29445839
--- /dev/null
+++ b/xen/drivers/scsi/sd.h
@@ -0,0 +1,66 @@
+/*
+ * sd.h Copyright (C) 1992 Drew Eckhardt
+ * SCSI disk driver header file by
+ * Drew Eckhardt
+ *
+ * <drew@colorado.edu>
+ *
+ * Modified by Eric Youngdale eric@andante.org to
+ * add scatter-gather, multiple outstanding request, and other
+ * enhancements.
+ */
+#ifndef _SD_H
+#define _SD_H
+/*
+ $Header: /usr/src/linux/kernel/blk_drv/scsi/RCS/sd.h,v 1.1 1992/07/24 06:27:38 root Exp root $
+ */
+
+#ifndef _SCSI_H
+#include "scsi.h"
+#endif
+
+#ifndef _GENDISK_H
+#include <xeno/genhd.h>
+#endif
+
+typedef struct scsi_disk {
+ unsigned capacity; /* size in blocks */
+ Scsi_Device *device;
+ unsigned char ready; /* flag ready for FLOPTICAL */
+ unsigned char write_prot; /* flag write_protect for rmvable dev */
+ unsigned char sector_bit_size; /* sector_size = 2 to the bit size power */
+ unsigned char sector_bit_shift; /* power of 2 sectors per FS block */
+ unsigned has_part_table:1; /* has partition table */
+} Scsi_Disk;
+
+extern int revalidate_scsidisk(kdev_t dev, int maxusage);
+
+/*
+ * Used by pmac to find the device associated with a target.
+ */
+extern kdev_t sd_find_target(void *host, int tgt);
+
+#define N_SD_MAJORS 8
+
+#define SD_MAJOR_MASK (N_SD_MAJORS - 1)
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/include/asm-i386/apic.h b/xen/include/asm-i386/apic.h
new file mode 100644
index 0000000000..574cc23203
--- /dev/null
+++ b/xen/include/asm-i386/apic.h
@@ -0,0 +1,96 @@
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+//#include <linux/config.h>
+//#include <linux/pm.h>
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#define APIC_DEBUG 0
+
+#if APIC_DEBUG
+#define Dprintk(x...) printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, unsigned long v)
+{
+ *((volatile unsigned long *)(APIC_BASE+reg)) = v;
+}
+
+static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
+{
+ xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+}
+
+static __inline unsigned long apic_read(unsigned long reg)
+{
+ return *((volatile unsigned long *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+ do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+}
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x,y) apic_write((x),(y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x,y) apic_write_atomic((x),(y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+ /*
+ * ack_APIC_irq() actually gets compiled as a single instruction:
+ * - a single rmw on Pentium/82489DX
+ * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+ * ... yummie.
+ */
+
+ /* Docs say use 0 for future compatibility */
+ apic_write_around(APIC_EOI, 0);
+}
+
+extern int get_maxlvt(void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (void);
+extern void disable_local_APIC (void);
+extern int verify_local_APIC (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void setup_APIC_clocks (void);
+extern void setup_apic_nmi_watchdog (void);
+extern inline void nmi_watchdog_tick (struct pt_regs * regs);
+extern int APIC_init_uniprocessor (void);
+extern void disable_APIC_timer(void);
+extern void enable_APIC_timer(void);
+
+//extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);
+//extern void apic_pm_unregister(struct pm_dev*);
+
+extern unsigned int apic_timer_irqs [NR_CPUS];
+extern int check_nmi_watchdog (void);
+
+extern unsigned int nmi_watchdog;
+#define NMI_NONE 0
+#define NMI_IO_APIC 1
+#define NMI_LOCAL_APIC 2
+#define NMI_INVALID 3
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
diff --git a/xen/include/asm-i386/apicdef.h b/xen/include/asm-i386/apicdef.h
new file mode 100644
index 0000000000..227bfca652
--- /dev/null
+++ b/xen/include/asm-i386/apicdef.h
@@ -0,0 +1,378 @@
+#ifndef __ASM_APICDEF_H
+#define __ASM_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define APIC_DEFAULT_PHYS_BASE 0xfee00000
+
+#define APIC_ID 0x20
+#define APIC_ID_MASK (0x0F<<24)
+#define GET_APIC_ID(x) (((x)>>24)&0x0F)
+#define APIC_LVR 0x30
+#define APIC_LVR_MASK 0xFF00FF
+#define GET_APIC_VERSION(x) ((x)&0xFF)
+#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF)
+#define APIC_INTEGRATED(x) ((x)&0xF0)
+#define APIC_TASKPRI 0x80
+#define APIC_TPRI_MASK 0xFF
+#define APIC_ARBPRI 0x90
+#define APIC_ARBPRI_MASK 0xFF
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */
+#define APIC_RRR 0xC0
+#define APIC_LDR 0xD0
+#define APIC_LDR_MASK (0xFF<<24)
+#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF)
+#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
+#define APIC_ALL_CPUS 0xFF
+#define APIC_DFR 0xE0
+#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */
+#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1<<9)
+#define APIC_SPIV_APIC_ENABLED (1<<8)
+#define APIC_ISR 0x100
+#define APIC_TMR 0x180
+#define APIC_IRR 0x200
+#define APIC_ESR 0x280
+#define APIC_ESR_SEND_CS 0x00001
+#define APIC_ESR_RECV_CS 0x00002
+#define APIC_ESR_SEND_ACC 0x00004
+#define APIC_ESR_RECV_ACC 0x00008
+#define APIC_ESR_SENDILL 0x00020
+#define APIC_ESR_RECVILL 0x00040
+#define APIC_ESR_ILLREGA 0x00080
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
+#define SET_APIC_DEST_FIELD(x) ((x)<<24)
+#define APIC_LVTT 0x320
+#define APIC_LVTPC 0x340
+#define APIC_LVT0 0x350
+#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
+#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
+#define SET_APIC_TIMER_BASE(x) (((x)<<18))
+#define APIC_TIMER_BASE_CLKIN 0x0
+#define APIC_TIMER_BASE_TMBASE 0x1
+#define APIC_TIMER_BASE_DIV 0x2
+#define APIC_LVT_TIMER_PERIODIC (1<<17)
+#define APIC_LVT_MASKED (1<<16)
+#define APIC_LVT_LEVEL_TRIGGER (1<<15)
+#define APIC_LVT_REMOTE_IRR (1<<14)
+#define APIC_INPUT_POLARITY (1<<13)
+#define APIC_SEND_PENDING (1<<12)
+#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
+#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
+#define APIC_MODE_FIXED 0x0
+#define APIC_MODE_NMI 0x4
+#define APIC_MODE_EXINT 0x7
+#define APIC_LVT1 0x360
+#define APIC_LVTERR 0x370
+#define APIC_TMICT 0x380
+#define APIC_TMCCT 0x390
+#define APIC_TDCR 0x3E0
+#define APIC_TDR_DIV_TMBASE (1<<2)
+#define APIC_TDR_DIV_1 0xB
+#define APIC_TDR_DIV_2 0x0
+#define APIC_TDR_DIV_4 0x1
+#define APIC_TDR_DIV_8 0x2
+#define APIC_TDR_DIV_16 0x3
+#define APIC_TDR_DIV_32 0x8
+#define APIC_TDR_DIV_64 0x9
+#define APIC_TDR_DIV_128 0xA
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_IO_APICS 32
+#else
+#define MAX_IO_APICS 8
+#endif
+
+
+/*
+ * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs
+ * don't broadcast (yet?), but if they did, they might use 0xFFFF.
+ */
+#define APIC_BROADCAST_ID_XAPIC (0xFF)
+#define APIC_BROADCAST_ID_APIC (0x0F)
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+#define lapic ((volatile struct local_apic *)APIC_BASE)
+
+struct local_apic {
+
+/*000*/ struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/ struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/ struct { /* APIC ID Register */
+ u32 __reserved_1 : 24,
+ phys_apic_id : 4,
+ __reserved_2 : 4;
+ u32 __reserved[3];
+ } id;
+
+/*030*/ const
+ struct { /* APIC Version Register */
+ u32 version : 8,
+ __reserved_1 : 8,
+ max_lvt : 8,
+ __reserved_2 : 8;
+ u32 __reserved[3];
+ } version;
+
+/*040*/ struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/ struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/ struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/ struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/ struct { /* Task Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } tpr;
+
+/*090*/ const
+ struct { /* Arbitration Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } apr;
+
+/*0A0*/ const
+ struct { /* Processor Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } ppr;
+
+/*0B0*/ struct { /* End Of Interrupt Register */
+ u32 eoi;
+ u32 __reserved[3];
+ } eoi;
+
+/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/ struct { /* Logical Destination Register */
+ u32 __reserved_1 : 24,
+ logical_dest : 8;
+ u32 __reserved_2[3];
+ } ldr;
+
+/*0E0*/ struct { /* Destination Format Register */
+ u32 __reserved_1 : 28,
+ model : 4;
+ u32 __reserved_2[3];
+ } dfr;
+
+/*0F0*/ struct { /* Spurious Interrupt Vector Register */
+ u32 spurious_vector : 8,
+ apic_enabled : 1,
+ focus_cpu : 1,
+ __reserved_2 : 22;
+ u32 __reserved_3[3];
+ } svr;
+
+/*100*/ struct { /* In Service Register */
+/*170*/ u32 bitfield;
+ u32 __reserved[3];
+ } isr [8];
+
+/*180*/ struct { /* Trigger Mode Register */
+/*1F0*/ u32 bitfield;
+ u32 __reserved[3];
+ } tmr [8];
+
+/*200*/ struct { /* Interrupt Request Register */
+/*270*/ u32 bitfield;
+ u32 __reserved[3];
+ } irr [8];
+
+/*280*/ union { /* Error Status Register */
+ struct {
+ u32 send_cs_error : 1,
+ receive_cs_error : 1,
+ send_accept_error : 1,
+ receive_accept_error : 1,
+ __reserved_1 : 1,
+ send_illegal_vector : 1,
+ receive_illegal_vector : 1,
+ illegal_register_address : 1,
+ __reserved_2 : 24;
+ u32 __reserved_3[3];
+ } error_bits;
+ struct {
+ u32 errors;
+ u32 __reserved_3[3];
+ } all_errors;
+ } esr;
+
+/*290*/ struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/ struct { /* Interrupt Command Register 1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ destination_mode : 1,
+ delivery_status : 1,
+ __reserved_1 : 1,
+ level : 1,
+ trigger : 1,
+ __reserved_2 : 2,
+ shorthand : 2,
+ __reserved_3 : 12;
+ u32 __reserved_4[3];
+ } icr1;
+
+/*310*/ struct { /* Interrupt Command Register 2 */
+ union {
+ u32 __reserved_1 : 24,
+ phys_dest : 4,
+ __reserved_2 : 4;
+ u32 __reserved_3 : 24,
+ logical_dest : 8;
+ } dest;
+ u32 __reserved_4[3];
+ } icr2;
+
+/*320*/ struct { /* LVT - Timer */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ timer_mode : 1,
+ __reserved_3 : 14;
+ u32 __reserved_4[3];
+ } lvt_timer;
+
+/*330*/ struct { u32 __reserved[4]; } __reserved_15;
+
+/*340*/ struct { /* LVT - Performance Counter */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_pc;
+
+/*350*/ struct { /* LVT - LINT0 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint0;
+
+/*360*/ struct { /* LVT - LINT1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint1;
+
+/*370*/ struct { /* LVT - Error */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_error;
+
+/*380*/ struct { /* Timer Initial Count Register */
+ u32 initial_count;
+ u32 __reserved_2[3];
+ } timer_icr;
+
+/*390*/ const
+ struct { /* Timer Current Count Register */
+ u32 curr_count;
+ u32 __reserved_2[3];
+ } timer_ccr;
+
+/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/ struct { /* Timer Divide Configuration Register */
+ u32 divisor : 4,
+ __reserved_1 : 28;
+ u32 __reserved_2[3];
+ } timer_dcr;
+
+/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#endif
diff --git a/xen/include/asm-i386/atomic.h b/xen/include/asm-i386/atomic.h
new file mode 100644
index 0000000000..70a1212ed6
--- /dev/null
+++ b/xen/include/asm-i386/atomic.h
@@ -0,0 +1,204 @@
+#ifndef __ARCH_I386_ATOMIC__
+#define __ARCH_I386_ATOMIC__
+
+#include <xeno/config.h>
+
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK "lock ; "
+#else
+#define LOCK ""
+#endif
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_read(v) ((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_set(v,i) (((v)->counter) = (i))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v. Note that the guaranteed useful range
+ * of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "addl %1,%0"
+ :"=m" (v->counter)
+ :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "subl %1,%0"
+ :"=m" (v->counter)
+ :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "subl %2,%0; sete %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"ir" (i), "m" (v->counter) : "memory");
+ return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_inc(atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "incl %0"
+ :"=m" (v->counter)
+ :"m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_dec(atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "decl %0"
+ :"=m" (v->counter)
+ :"m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "decl %0; sete %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"m" (v->counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "incl %0; sete %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"m" (v->counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "addl %2,%0; sets %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"ir" (i), "m" (v->counter) : "memory");
+ return c;
+}
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr) \
+__asm__ __volatile__(LOCK "andl %0,%1" \
+: : "r" (~(mask)),"m" (*addr) : "memory")
+
+#define atomic_set_mask(mask, addr) \
+__asm__ __volatile__(LOCK "orl %0,%1" \
+: : "r" (mask),"m" (*addr) : "memory")
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec() barrier()
+#define smp_mb__after_atomic_dec() barrier()
+#define smp_mb__before_atomic_inc() barrier()
+#define smp_mb__after_atomic_inc() barrier()
+
+#endif
diff --git a/xen/include/asm-i386/bitops.h b/xen/include/asm-i386/bitops.h
new file mode 100644
index 0000000000..73bcd8ef5f
--- /dev/null
+++ b/xen/include/asm-i386/bitops.h
@@ -0,0 +1,368 @@
+#ifndef _I386_BITOPS_H
+#define _I386_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#include <xeno/config.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered. See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "btsl %1,%0"
+ :"=m" (ADDR)
+ :"Ir" (nr));
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __set_bit(int nr, volatile void * addr)
+{
+ __asm__(
+ "btsl %1,%0"
+ :"=m" (ADDR)
+ :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "btrl %1,%0"
+ :"=m" (ADDR)
+ :"Ir" (nr));
+}
+#define smp_mb__before_clear_bit() barrier()
+#define smp_mb__after_clear_bit() barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__(
+ "btcl %1,%0"
+ :"=m" (ADDR)
+ :"Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void change_bit(int nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "btcl %1,%0"
+ :"=m" (ADDR)
+ :"Ir" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_set_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__( LOCK_PREFIX
+ "btsl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__(
+ "btsl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"Ir" (nr));
+ return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__( LOCK_PREFIX
+ "btrl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__(
+ "btrl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"Ir" (nr));
+ return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__(
+ "btcl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its new value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_change_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__( LOCK_PREFIX
+ "btcl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"Ir" (nr) : "memory");
+ return oldbit;
+}
+
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+ return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+ int oldbit;
+
+ __asm__ __volatile__(
+ "btl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit)
+ :"m" (ADDR),"Ir" (nr));
+ return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+{
+ int d0, d1, d2;
+ int res;
+
+ if (!size)
+ return 0;
+ /* This looks at memory. Mark it volatile to tell gcc not to move it around */
+ __asm__ __volatile__(
+ "movl $-1,%%eax\n\t"
+ "xorl %%edx,%%edx\n\t"
+ "repe; scasl\n\t"
+ "je 1f\n\t"
+ "xorl -4(%%edi),%%eax\n\t"
+ "subl $4,%%edi\n\t"
+ "bsfl %%eax,%%edx\n"
+ "1:\tsubl %%ebx,%%edi\n\t"
+ "shll $3,%%edi\n\t"
+ "addl %%edi,%%edx"
+ :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
+ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
+ return res;
+}
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
+{
+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+ int set = 0, bit = offset & 31, res;
+
+ if (bit) {
+ /*
+ * Look for zero in first byte
+ */
+ __asm__("bsfl %1,%0\n\t"
+ "jne 1f\n\t"
+ "movl $32, %0\n"
+ "1:"
+ : "=r" (set)
+ : "r" (~(*p >> bit)));
+ if (set < (32 - bit))
+ return set + offset;
+ set = 32 - bit;
+ p++;
+ }
+ /*
+ * No zero yet, search remaining full bytes for a zero
+ */
+ res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+ return (offset + set + res);
+}
+
+/**
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static __inline__ unsigned long ffz(unsigned long word)
+{
+ __asm__("bsfl %1,%0"
+ :"=r" (word)
+ :"r" (~word));
+ return word;
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+ int r;
+
+ __asm__("bsfl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n"
+ "1:" : "=r" (r) : "g" (x));
+ return r+1;
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#define ext2_set_bit __test_and_set_bit
+#define ext2_clear_bit __test_and_clear_bit
+#define ext2_test_bit test_bit
+#define ext2_find_first_zero_bit find_first_zero_bit
+#define ext2_find_next_zero_bit find_next_zero_bit
+
+/* Bitmap functions for the minix filesystem. */
+#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
+#define minix_set_bit(nr,addr) __set_bit(nr,addr)
+#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
+#define minix_test_bit(nr,addr) test_bit(nr,addr)
+#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
+
+#endif /* _I386_BITOPS_H */
diff --git a/xen/include/asm-i386/byteorder.h b/xen/include/asm-i386/byteorder.h
new file mode 100644
index 0000000000..bbfb629fae
--- /dev/null
+++ b/xen/include/asm-i386/byteorder.h
@@ -0,0 +1,47 @@
+#ifndef _I386_BYTEORDER_H
+#define _I386_BYTEORDER_H
+
+#include <asm/types.h>
+
+#ifdef __GNUC__
+
+/* For avoiding bswap on i386 */
+#ifdef __KERNEL__
+#include <linux/config.h>
+#endif
+
+static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
+{
+#ifdef CONFIG_X86_BSWAP
+ __asm__("bswap %0" : "=r" (x) : "0" (x));
+#else
+ __asm__("xchgb %b0,%h0\n\t" /* swap lower bytes */
+ "rorl $16,%0\n\t" /* swap words */
+ "xchgb %b0,%h0" /* swap higher bytes */
+ :"=q" (x)
+ : "0" (x));
+#endif
+ return x;
+}
+
+static __inline__ __const__ __u16 ___arch__swab16(__u16 x)
+{
+ __asm__("xchgb %b0,%h0" /* swap bytes */ \
+ : "=q" (x) \
+ : "0" (x)); \
+ return x;
+}
+
+#define __arch__swab32(x) ___arch__swab32(x)
+#define __arch__swab16(x) ___arch__swab16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+# define __BYTEORDER_HAS_U64__
+# define __SWAB_64_THRU_32__
+#endif
+
+#endif /* __GNUC__ */
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _I386_BYTEORDER_H */
diff --git a/xen/include/asm-i386/cache.h b/xen/include/asm-i386/cache.h
new file mode 100644
index 0000000000..502c8ba7a6
--- /dev/null
+++ b/xen/include/asm-i386/cache.h
@@ -0,0 +1,13 @@
+/*
+ * include/asm-i386/cache.h
+ */
+#ifndef __ARCH_I386_CACHE_H
+#define __ARCH_I386_CACHE_H
+
+#include <xeno/config.h>
+
+/* L1 cache line size */
+#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#endif
diff --git a/xen/include/asm-i386/cpufeature.h b/xen/include/asm-i386/cpufeature.h
new file mode 100644
index 0000000000..85b8b43974
--- /dev/null
+++ b/xen/include/asm-i386/cpufeature.h
@@ -0,0 +1,76 @@
+/*
+ * cpufeature.h
+ *
+ * Defines x86 CPU feature bits
+ */
+
+#ifndef __ASM_I386_CPUFEATURE_H
+#define __ASM_I386_CPUFEATURE_H
+
+/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
+#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+
+#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
+#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+ /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
+
+#endif /* __ASM_I386_CPUFEATURE_H */
+
+/*
+ * Local Variables:
+ * mode:c
+ * comment-column:42
+ * End:
+ */
diff --git a/xen/include/asm-i386/current.h b/xen/include/asm-i386/current.h
new file mode 100644
index 0000000000..bc1496a2c9
--- /dev/null
+++ b/xen/include/asm-i386/current.h
@@ -0,0 +1,15 @@
+#ifndef _I386_CURRENT_H
+#define _I386_CURRENT_H
+
+struct task_struct;
+
+static inline struct task_struct * get_current(void)
+{
+ struct task_struct *current;
+ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
+ return current;
+ }
+
+#define current get_current()
+
+#endif /* !(_I386_CURRENT_H) */
diff --git a/xen/include/asm-i386/debugreg.h b/xen/include/asm-i386/debugreg.h
new file mode 100644
index 0000000000..f0b2b06ae0
--- /dev/null
+++ b/xen/include/asm-i386/debugreg.h
@@ -0,0 +1,64 @@
+#ifndef _I386_DEBUGREG_H
+#define _I386_DEBUGREG_H
+
+
+/* Indicate the register numbers for a number of the specific
+ debug registers. Registers 0-3 contain the addresses we wish to trap on */
+#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
+#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
+
+#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
+#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
+
+/* Define a few things for the status register. We can use this to determine
+ which debugging register was responsible for the trap. The other bits
+ are either reserved or not of interest to us. */
+
+#define DR_TRAP0 (0x1) /* db0 */
+#define DR_TRAP1 (0x2) /* db1 */
+#define DR_TRAP2 (0x4) /* db2 */
+#define DR_TRAP3 (0x8) /* db3 */
+
+#define DR_STEP (0x4000) /* single-step */
+#define DR_SWITCH (0x8000) /* task switch */
+
+/* Now define a bunch of things for manipulating the control register.
+ The top two bytes of the control register consist of 4 fields of 4
+ bits - each field corresponds to one of the four debug registers,
+ and indicates what types of access we trap on, and how large the data
+ field is that we are looking at */
+
+#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
+#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
+
+#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_RW_READ (0x3)
+
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+
+/* The low byte to the control register determine which registers are
+ enabled. There are 4 fields of two bits. One bit is "local", meaning
+ that the processor will reset the bit after a task switch and the other
+ is global meaning that we have to explicitly reset the bit. With linux,
+ you can use either one, since we explicitly zero the register when we enter
+ kernel mode. */
+
+#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
+#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
+#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
+
+#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
+#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+/* The second byte to the control register has a few special things.
+ We can slow the instruction pipeline for instructions coming via the
+ gdt or the ldt if we want to. I am not sure why this is an advantage */
+
+#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
+#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+
+#endif
diff --git a/xen/include/asm-i386/delay.h b/xen/include/asm-i386/delay.h
new file mode 100644
index 0000000000..9e0adb4a27
--- /dev/null
+++ b/xen/include/asm-i386/delay.h
@@ -0,0 +1,14 @@
+#ifndef _I386_DELAY_H
+#define _I386_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/i386/lib/delay.c
+ */
+
+extern unsigned long ticks_per_usec;
+extern void __udelay(unsigned long usecs);
+#define udelay(n) __udelay(n)
+
+#endif /* defined(_I386_DELAY_H) */
diff --git a/xen/include/asm-i386/desc.h b/xen/include/asm-i386/desc.h
new file mode 100644
index 0000000000..2cb90769b5
--- /dev/null
+++ b/xen/include/asm-i386/desc.h
@@ -0,0 +1,32 @@
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#define __FIRST_TSS_ENTRY 8
+#define __TSS(n) ((n) + __FIRST_TSS_ENTRY)
+
+#ifndef __ASSEMBLY__
+struct desc_struct {
+ unsigned long a,b;
+};
+
+extern struct desc_struct gdt_table[];
+extern struct desc_struct *idt, *gdt;
+
+struct Xgt_desc_struct {
+ unsigned short size;
+ unsigned long address __attribute__((packed));
+};
+
+#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
+#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
+
+#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3))
+
+#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" ((n)<<3))
+
+extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_tss_desc(unsigned int n, void *addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/xen/include/asm-i386/dma.h b/xen/include/asm-i386/dma.h
new file mode 100644
index 0000000000..f24c90a7bd
--- /dev/null
+++ b/xen/include/asm-i386/dma.h
@@ -0,0 +1,301 @@
+/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <linux/config.h>
+#include <linux/spinlock.h> /* And spinlocks */
+#include <asm/io.h> /* need byte IO */
+#include <linux/delay.h>
+
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb outb_p
+#else
+#define dma_outb outb
+#endif
+
+#define dma_inb inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ * controller 1: channels 0-3, byte operations, ports 00-1F
+ * controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ * - ALL registers are 8 bits only, regardless of transfer size
+ * - channel 4 is not used - cascades 1 into 2.
+ * - channels 0-3 are byte - addresses/counts are for physical bytes
+ * - channels 5-7 are word - addresses/counts are for physical words
+ * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ * - transfer count loaded to registers is 1 less than actual count
+ * - controller 2 offsets are all even (2x offsets for controller 1)
+ * - page registers for 5-7 don't use data bit 0, represent 128K pages
+ * - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ * Address mapping for channels 0-3:
+ *
+ * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses)
+ * | ... | | ... | | ... |
+ * | ... | | ... | | ... |
+ * | ... | | ... | | ... |
+ * P7 ... P0 A7 ... A0 A7 ... A0
+ * | Page | Addr MSB | Addr LSB | (DMA registers)
+ *
+ * Address mapping for channels 5-7:
+ *
+ * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses)
+ * | ... | \ \ ... \ \ \ ... \ \
+ * | ... | \ \ ... \ \ \ ... \ (not used)
+ * | ... | \ \ ... \ \ \ ... \
+ * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0
+ * | Page | Addr MSB | Addr LSB | (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation.
+ *
+ */
+
+#define MAX_DMA_CHANNELS 8
+
+#if 0
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000)
+#endif
+
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG 0x08 /* command register (w) */
+#define DMA1_STAT_REG 0x08 /* status register (r) */
+#define DMA1_REQ_REG 0x09 /* request register (w) */
+#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */
+#define DMA1_MODE_REG 0x0B /* mode register (w) */
+#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */
+#define DMA1_RESET_REG 0x0D /* Master Clear (w) */
+#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */
+#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */
+
+#define DMA2_CMD_REG 0xD0 /* command register (w) */
+#define DMA2_STAT_REG 0xD0 /* status register (r) */
+#define DMA2_REQ_REG 0xD2 /* request register (w) */
+#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */
+#define DMA2_MODE_REG 0xD6 /* mode register (w) */
+#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */
+#define DMA2_RESET_REG 0xDA /* Master Clear (w) */
+#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */
+#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */
+
+#define DMA_ADDR_0 0x00 /* DMA address registers */
+#define DMA_ADDR_1 0x02
+#define DMA_ADDR_2 0x04
+#define DMA_ADDR_3 0x06
+#define DMA_ADDR_4 0xC0
+#define DMA_ADDR_5 0xC4
+#define DMA_ADDR_6 0xC8
+#define DMA_ADDR_7 0xCC
+
+#define DMA_CNT_0 0x01 /* DMA count registers */
+#define DMA_CNT_1 0x03
+#define DMA_CNT_2 0x05
+#define DMA_CNT_3 0x07
+#define DMA_CNT_4 0xC2
+#define DMA_CNT_5 0xC6
+#define DMA_CNT_6 0xCA
+#define DMA_CNT_7 0xCE
+
+#define DMA_PAGE_0 0x87 /* DMA page registers */
+#define DMA_PAGE_1 0x83
+#define DMA_PAGE_2 0x81
+#define DMA_PAGE_3 0x82
+#define DMA_PAGE_5 0x8B
+#define DMA_PAGE_6 0x89
+#define DMA_PAGE_7 0x8A
+
+#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT 0x10
+
+
+extern spinlock_t dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&dma_spin_lock, flags);
+ return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+ spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+ if (dmanr<=3)
+ dma_outb(dmanr, DMA1_MASK_REG);
+ else
+ dma_outb(dmanr & 3, DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+ if (dmanr<=3)
+ dma_outb(dmanr | 4, DMA1_MASK_REG);
+ else
+ dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+ if (dmanr<=3)
+ dma_outb(0, DMA1_CLEAR_FF_REG);
+ else
+ dma_outb(0, DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+ if (dmanr<=3)
+ dma_outb(mode | dmanr, DMA1_MODE_REG);
+ else
+ dma_outb(mode | (dmanr&3), DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+ switch(dmanr) {
+ case 0:
+ dma_outb(pagenr, DMA_PAGE_0);
+ break;
+ case 1:
+ dma_outb(pagenr, DMA_PAGE_1);
+ break;
+ case 2:
+ dma_outb(pagenr, DMA_PAGE_2);
+ break;
+ case 3:
+ dma_outb(pagenr, DMA_PAGE_3);
+ break;
+ case 5:
+ dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+ break;
+ case 6:
+ dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+ break;
+ case 7:
+ dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+ break;
+ }
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+ set_dma_page(dmanr, a>>16);
+ if (dmanr <= 3) {
+ dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+ dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+ } else {
+ dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+ dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+ }
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+ count--;
+ if (dmanr <= 3) {
+ dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+ dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+ } else {
+ dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+ dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+ }
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+ unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
+ : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
+
+ /* using short to get 16-bit wrap around */
+ unsigned short count;
+
+ count = 1 + dma_inb(io_port);
+ count += dma_inb(io_port) << 8;
+
+ return (dmanr<=3)? count : (count<<1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */
+extern void free_dma(unsigned int dmanr); /* release it again */
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy (0)
+#endif
+
+#endif /* _ASM_DMA_H */
diff --git a/xen/include/asm-i386/domain_page.h b/xen/include/asm-i386/domain_page.h
new file mode 100644
index 0000000000..92fb261147
--- /dev/null
+++ b/xen/include/asm-i386/domain_page.h
@@ -0,0 +1,43 @@
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain page frames into Xen space.
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+
+extern unsigned long *mapcache[NR_CPUS];
+#define MAPCACHE_ENTRIES 1024
+
+/*
+ * Maps a given physical address, returning corresponding virtual address.
+ * The entire page containing that VA is now accessible until a
+ * corresponding call to unmap_domain_mem().
+ */
+extern void *map_domain_mem(unsigned long pa);
+
+/*
+ * Pass a VA within a page previously mapped with map_domain_mem().
+ * That page will then be removed from the mapping lists.
+ */
+extern void unmap_domain_mem(void *va);
+
+#if 0
+#define MAPCACHE_HASH(_pfn) ((_pfn) & (MAPCACHE_ENTRIES-1))
+static inline void *map_domain_mem(unsigned long pa)
+{
+ unsigned long pfn = pa >> PAGE_SHIFT;
+ unsigned long hash = MAPCACHE_HASH(pfn);
+ unsigned long *pent = mapcache[smp_processor_id()] + hash;
+ void *va = (void *)(MAPCACHE_VIRT_START +
+ (hash << PAGE_SHIFT) +
+ (pa & ~PAGE_MASK));
+ if ( (*pent & PAGE_MASK) != (pfn << PAGE_SHIFT) )
+ {
+ *pent = (pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
+ __flush_tlb_one(va);
+ }
+ return va;
+}
+#endif
diff --git a/xen/include/asm-i386/elf.h b/xen/include/asm-i386/elf.h
new file mode 100644
index 0000000000..ded22856d0
--- /dev/null
+++ b/xen/include/asm-i386/elf.h
@@ -0,0 +1,233 @@
+/*
+ * GRUB -- GRand Unified Bootloader
+ * Copyright (C) 1996 Erich Boleyn <erich@uruk.org>
+ * Copyright (C) 2001 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* 32-bit data types */
+
+typedef unsigned long Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned long Elf32_Off;
+typedef signed long Elf32_Sword;
+typedef unsigned long Elf32_Word;
+/* "unsigned char" already exists */
+
+/* ELF header */
+typedef struct
+{
+
+#define EI_NIDENT 16
+
+ /* first four characters are defined below */
+#define EI_MAG0 0
+#define ELFMAG0 0x7f
+#define EI_MAG1 1
+#define ELFMAG1 'E'
+#define EI_MAG2 2
+#define ELFMAG2 'L'
+#define EI_MAG3 3
+#define ELFMAG3 'F'
+
+#define EI_CLASS 4 /* data sizes */
+#define ELFCLASS32 1 /* i386 -- up to 32-bit data sizes present */
+
+#define EI_DATA 5 /* data type and ordering */
+#define ELFDATA2LSB 1 /* i386 -- LSB 2's complement */
+
+#define EI_VERSION 6 /* version number. "e_version" must be the same */
+#define EV_CURRENT 1 /* current version number */
+
+#define EI_PAD 7 /* from here in is just padding */
+
+#define EI_BRAND 8 /* start of OS branding (This is
+ obviously illegal against the ELF
+ standard.) */
+
+ unsigned char e_ident[EI_NIDENT]; /* basic identification block */
+
+#define ET_EXEC 2 /* we only care about executable types */
+ Elf32_Half e_type; /* file types */
+
+#define EM_386 3 /* i386 -- obviously use this one */
+ Elf32_Half e_machine; /* machine types */
+ Elf32_Word e_version; /* use same as "EI_VERSION" above */
+ Elf32_Addr e_entry; /* entry point of the program */
+ Elf32_Off e_phoff; /* program header table file offset */
+ Elf32_Off e_shoff; /* section header table file offset */
+ Elf32_Word e_flags; /* flags */
+ Elf32_Half e_ehsize; /* elf header size in bytes */
+ Elf32_Half e_phentsize; /* program header entry size */
+ Elf32_Half e_phnum; /* number of entries in program header */
+ Elf32_Half e_shentsize; /* section header entry size */
+ Elf32_Half e_shnum; /* number of entries in section header */
+
+#define SHN_UNDEF 0
+#define SHN_LORESERVE 0xff00
+#define SHN_LOPROC 0xff00
+#define SHN_HIPROC 0xff1f
+#define SHN_ABS 0xfff1
+#define SHN_COMMON 0xfff2
+#define SHN_HIRESERVE 0xffff
+ Elf32_Half e_shstrndx; /* section header table index */
+}
+Elf32_Ehdr;
+
+
+#define BOOTABLE_I386_ELF(h) \
+ ((h.e_ident[EI_MAG0] == ELFMAG0) & (h.e_ident[EI_MAG1] == ELFMAG1) \
+ & (h.e_ident[EI_MAG2] == ELFMAG2) & (h.e_ident[EI_MAG3] == ELFMAG3) \
+ & (h.e_ident[EI_CLASS] == ELFCLASS32) & (h.e_ident[EI_DATA] == ELFDATA2LSB) \
+ & (h.e_ident[EI_VERSION] == EV_CURRENT) & (h.e_type == ET_EXEC) \
+ & (h.e_machine == EM_386) & (h.e_version == EV_CURRENT))
+
+/* section table - ? */
+typedef struct
+{
+ Elf32_Word sh_name; /* Section name (string tbl index) */
+ Elf32_Word sh_type; /* Section type */
+ Elf32_Word sh_flags; /* Section flags */
+ Elf32_Addr sh_addr; /* Section virtual addr at execution */
+ Elf32_Off sh_offset; /* Section file offset */
+ Elf32_Word sh_size; /* Section size in bytes */
+ Elf32_Word sh_link; /* Link to another section */
+ Elf32_Word sh_info; /* Additional section information */
+ Elf32_Word sh_addralign; /* Section alignment */
+ Elf32_Word sh_entsize; /* Entry size if section holds table */
+}
+Elf32_Shdr;
+
+/* symbol table - page 4-25, figure 4-15 */
+typedef struct
+{
+ Elf32_Word st_name;
+ Elf32_Addr st_value;
+ Elf32_Word st_size;
+ unsigned char st_info;
+ unsigned char st_other;
+ Elf32_Half st_shndx;
+}
+Elf32_Sym;
+
+/* symbol type and binding attributes - page 4-26 */
+
+#define ELF32_ST_BIND(i) ((i) >> 4)
+#define ELF32_ST_TYPE(i) ((i) & 0xf)
+#define ELF32_ST_INFO(b,t) (((b)<<4)+((t)&0xf))
+
+/* symbol binding - page 4-26, figure 4-16 */
+
+#define STB_LOCAL 0
+#define STB_GLOBAL 1
+#define STB_WEAK 2
+#define STB_LOPROC 13
+#define STB_HIPROC 15
+
+/* symbol types - page 4-28, figure 4-17 */
+
+#define STT_NOTYPE 0
+#define STT_OBJECT 1
+#define STT_FUNC 2
+#define STT_SECTION 3
+#define STT_FILE 4
+#define STT_LOPROC 13
+#define STT_HIPROC 15
+
+
+/* Macros to split/combine relocation type and symbol page 4-32 */
+
+#define ELF32_R_SYM(__i) ((__i)>>8)
+#define ELF32_R_TYPE(__i) ((unsigned char) (__i))
+#define ELF32_R_INFO(__s, __t) (((__s)<<8) + (unsigned char) (__t))
+
+
+/* program header - page 5-2, figure 5-1 */
+
+typedef struct
+{
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+}
+Elf32_Phdr;
+
+/* segment types - page 5-3, figure 5-2 */
+
+#define PT_NULL 0
+#define PT_LOAD 1
+#define PT_DYNAMIC 2
+#define PT_INTERP 3
+#define PT_NOTE 4
+#define PT_SHLIB 5
+#define PT_PHDR 6
+
+#define PT_LOPROC 0x70000000
+#define PT_HIPROC 0x7fffffff
+
+/* segment permissions - page 5-6 */
+
+#define PF_X 0x1
+#define PF_W 0x2
+#define PF_R 0x4
+#define PF_MASKPROC 0xf0000000
+
+
+/* dynamic structure - page 5-15, figure 5-9 */
+
+typedef struct
+{
+ Elf32_Sword d_tag;
+ union
+ {
+ Elf32_Word d_val;
+ Elf32_Addr d_ptr;
+ }
+ d_un;
+}
+Elf32_Dyn;
+
+/* Dynamic array tags - page 5-16, figure 5-10. */
+
+#define DT_NULL 0
+#define DT_NEEDED 1
+#define DT_PLTRELSZ 2
+#define DT_PLTGOT 3
+#define DT_HASH 4
+#define DT_STRTAB 5
+#define DT_SYMTAB 6
+#define DT_RELA 7
+#define DT_RELASZ 8
+#define DT_RELAENT 9
+#define DT_STRSZ 10
+#define DT_SYMENT 11
+#define DT_INIT 12
+#define DT_FINI 13
+#define DT_SONAME 14
+#define DT_RPATH 15
+#define DT_SYMBOLIC 16
+#define DT_REL 17
+#define DT_RELSZ 18
+#define DT_RELENT 19
+#define DT_PLTREL 20
+#define DT_DEBUG 21
+#define DT_TEXTREL 22
+#define DT_JMPREL 23
diff --git a/xen/include/asm-i386/fixmap.h b/xen/include/asm-i386/fixmap.h
new file mode 100644
index 0000000000..b0f455a5af
--- /dev/null
+++ b/xen/include/asm-i386/fixmap.h
@@ -0,0 +1,107 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <xeno/config.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
+enum fixed_addresses {
+#ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+#ifdef CONFIG_HIGHMEM
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+ __end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+ l1_pgentry_t entry);
+
+#define set_fixmap(idx, phys) \
+ __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR))
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+ __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE))
+/*
+ * used by vmalloc.c.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap, and leave one page empty
+ * at the top of mem..
+ */
+#define FIXADDR_TOP (0xffffe000UL)
+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+ /*
+ * this branch gets completely eliminated after inlining,
+ * except when someone tries to use fixaddr indices in an
+ * illegal way. (such as mixing up address types or using
+ * out-of-range indices).
+ *
+ * If it doesn't get removed, the linker will complain
+ * loudly with a reasonably clear error message..
+ */
+ if (idx >= __end_of_fixed_addresses)
+ __this_fixmap_does_not_exist();
+
+ return __fix_to_virt(idx);
+}
+
+#endif
diff --git a/xen/include/asm-i386/flushtlb.h b/xen/include/asm-i386/flushtlb.h
new file mode 100644
index 0000000000..306839c6a4
--- /dev/null
+++ b/xen/include/asm-i386/flushtlb.h
@@ -0,0 +1,48 @@
+/******************************************************************************
+ * flushtlb.h
+ *
+ * TLB flush macros that count flushes. Counting is used to enforce
+ * zero-copy safety, particularily for the network code.
+ *
+ * akw - Jan 21, 2003
+ */
+
+#ifndef __FLUSHTLB_H
+#define __FLUSHTLB_H
+
+#include <xeno/smp.h>
+
+unsigned long tlb_flush_count[NR_CPUS];
+//#if 0
+#define __read_cr3(__var) \
+ do { \
+ __asm__ __volatile ( \
+ "movl %%cr3, %0;" \
+ : "=r" (__var)); \
+ } while (0)
+//#endif
+
+#define __write_cr3_counted(__pa) \
+ do { \
+ __asm__ __volatile__ ( \
+ "movl %0, %%cr3;" \
+ :: "r" (__pa) \
+ : "memory"); \
+ tlb_flush_count[smp_processor_id()]++; \
+ } while (0)
+
+//#endif
+#define __flush_tlb_counted() \
+ do { \
+ unsigned int tmpreg; \
+ \
+ __asm__ __volatile__( \
+ "movl %%cr3, %0; # flush TLB \n" \
+ "movl %0, %%cr3; " \
+ : "=r" (tmpreg) \
+ :: "memory"); \
+ tlb_flush_count[smp_processor_id()]++; \
+ } while (0)
+
+#endif
+
diff --git a/xen/include/asm-i386/hardirq.h b/xen/include/asm-i386/hardirq.h
new file mode 100644
index 0000000000..bad529b882
--- /dev/null
+++ b/xen/include/asm-i386/hardirq.h
@@ -0,0 +1,88 @@
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <xeno/config.h>
+#include <xeno/irq.h>
+
+/* assembly code in softirq.h is sensitive to the offsets of these fields */
+typedef struct {
+ unsigned int __softirq_pending;
+ unsigned int __local_irq_count;
+ unsigned int __local_bh_count;
+ unsigned int __syscall_count;
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <xeno/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+
+/*
+ * Are we in an interrupt context? Either doing bottom half
+ * or hardware interrupt processing?
+ */
+#define in_interrupt() ({ int __cpu = smp_processor_id(); \
+ (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
+
+#define in_irq() (local_irq_count(smp_processor_id()) != 0)
+
+#ifndef CONFIG_SMP
+
+#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0)
+#define hardirq_endlock(cpu) do { } while (0)
+
+#define irq_enter(cpu, irq) (local_irq_count(cpu)++)
+#define irq_exit(cpu, irq) (local_irq_count(cpu)--)
+
+#define synchronize_irq() barrier()
+
+#else
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+
+extern unsigned char global_irq_holder;
+extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
+
+static inline int irqs_running (void)
+{
+ int i;
+
+ for (i = 0; i < smp_num_cpus; i++)
+ if (local_irq_count(i))
+ return 1;
+ return 0;
+}
+
+static inline void release_irqlock(int cpu)
+{
+ /* if we didn't own the irq lock, just ignore.. */
+ if (global_irq_holder == (unsigned char) cpu) {
+ global_irq_holder = 0xff;
+ clear_bit(0,&global_irq_lock);
+ }
+}
+
+static inline void irq_enter(int cpu, int irq)
+{
+ ++local_irq_count(cpu);
+
+ while (test_bit(0,&global_irq_lock)) {
+ cpu_relax();
+ }
+}
+
+static inline void irq_exit(int cpu, int irq)
+{
+ --local_irq_count(cpu);
+}
+
+static inline int hardirq_trylock(int cpu)
+{
+ return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock);
+}
+
+#define hardirq_endlock(cpu) do { } while (0)
+
+extern void synchronize_irq(void);
+
+#endif /* CONFIG_SMP */
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/xen/include/asm-i386/hdreg.h b/xen/include/asm-i386/hdreg.h
new file mode 100644
index 0000000000..1ad5c07394
--- /dev/null
+++ b/xen/include/asm-i386/hdreg.h
@@ -0,0 +1,12 @@
+/*
+ * linux/include/asm-i386/hdreg.h
+ *
+ * Copyright (C) 1994-1996 Linus Torvalds & authors
+ */
+
+#ifndef __ASMi386_HDREG_H
+#define __ASMi386_HDREG_H
+
+typedef unsigned short ide_ioreg_t;
+
+#endif /* __ASMi386_HDREG_H */
diff --git a/xen/include/asm-i386/i387.h b/xen/include/asm-i386/i387.h
new file mode 100644
index 0000000000..7ec679d446
--- /dev/null
+++ b/xen/include/asm-i386/i387.h
@@ -0,0 +1,39 @@
+/*
+ * include/asm-i386/i387.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#ifndef __ASM_I386_I387_H
+#define __ASM_I386_I387_H
+
+#include <xeno/sched.h>
+#include <asm/processor.h>
+
+extern void init_fpu(void);
+extern void save_init_fpu( struct task_struct *tsk );
+extern void restore_fpu( struct task_struct *tsk );
+
+#define unlazy_fpu( tsk ) do { \
+ if ( tsk->flags & PF_USEDFPU ) \
+ save_init_fpu( tsk ); \
+} while (0)
+
+#define clear_fpu( tsk ) do { \
+ if ( tsk->flags & PF_USEDFPU ) { \
+ asm volatile("fwait"); \
+ tsk->flags &= ~PF_USEDFPU; \
+ stts(); \
+ } \
+} while (0)
+
+#define load_mxcsr( val ) do { \
+ unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+ asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+} while (0)
+
+#endif /* __ASM_I386_I387_H */
diff --git a/xen/include/asm-i386/ide.h b/xen/include/asm-i386/ide.h
new file mode 100644
index 0000000000..6642abf467
--- /dev/null
+++ b/xen/include/asm-i386/ide.h
@@ -0,0 +1,128 @@
+/*
+ * linux/include/asm-i386/ide.h
+ *
+ * Copyright (C) 1994-1996 Linus Torvalds & authors
+ */
+
+/*
+ * This file contains the i386 architecture specific IDE code.
+ */
+
+#ifndef __ASMi386_IDE_H
+#define __ASMi386_IDE_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifndef MAX_HWIFS
+# ifdef CONFIG_BLK_DEV_IDEPCI
+#define MAX_HWIFS 10
+# else
+#define MAX_HWIFS 6
+# endif
+#endif
+
+#define ide__sti() __sti()
+
+static __inline__ int ide_default_irq(ide_ioreg_t base)
+{
+ switch (base) {
+ case 0x1f0: return 14;
+ case 0x170: return 15;
+ case 0x1e8: return 11;
+ case 0x168: return 10;
+ case 0x1e0: return 8;
+ case 0x160: return 12;
+ default:
+ return 0;
+ }
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base(int index)
+{
+ switch (index) {
+ case 0: return 0x1f0;
+ case 1: return 0x170;
+ case 2: return 0x1e8;
+ case 3: return 0x168;
+ case 4: return 0x1e0;
+ case 5: return 0x160;
+ default:
+ return 0;
+ }
+}
+
+static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq)
+{
+ ide_ioreg_t reg = data_port;
+ int i;
+
+ for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+ hw->io_ports[i] = reg;
+ reg += 1;
+ }
+ if (ctrl_port) {
+ hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+ } else {
+ hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
+ }
+ if (irq != NULL)
+ *irq = 0;
+ hw->io_ports[IDE_IRQ_OFFSET] = 0;
+}
+
+static __inline__ void ide_init_default_hwifs(void)
+{
+#ifndef CONFIG_BLK_DEV_IDEPCI
+ hw_regs_t hw;
+ int index;
+
+ for(index = 0; index < MAX_HWIFS; index++) {
+ ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
+ hw.irq = ide_default_irq(ide_default_io_base(index));
+ ide_register_hw(&hw, NULL);
+ }
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+}
+
+typedef union {
+ unsigned all : 8; /* all of the bits together */
+ struct {
+ unsigned head : 4; /* always zeros here */
+ unsigned unit : 1; /* drive select number, 0 or 1 */
+ unsigned bit5 : 1; /* always 1 */
+ unsigned lba : 1; /* using LBA instead of CHS */
+ unsigned bit7 : 1; /* always 1 */
+ } b;
+} select_t;
+
+typedef union {
+ unsigned all : 8; /* all of the bits together */
+ struct {
+ unsigned bit0 : 1;
+ unsigned nIEN : 1; /* device INTRQ to host */
+ unsigned SRST : 1; /* host soft reset bit */
+ unsigned bit3 : 1; /* ATA-2 thingy */
+ unsigned reserved456 : 3;
+ unsigned HOB : 1; /* 48-bit address ordering */
+ } b;
+} control_t;
+
+#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id))
+#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id))
+#define ide_check_region(from,extent) check_region((from), (extent))
+#define ide_request_region(from,extent,name) request_region((from), (extent), (name))
+#define ide_release_region(from,extent) release_region((from), (extent))
+
+/*
+ * The following are not needed for the non-m68k ports
+ */
+#define ide_ack_intr(hwif) (1)
+#define ide_fix_driveid(id) do {} while (0)
+#define ide_release_lock(lock) do {} while (0)
+#define ide_get_lock(lock, hdlr, data) do {} while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASMi386_IDE_H */
diff --git a/xen/include/asm-i386/io.h b/xen/include/asm-i386/io.h
new file mode 100644
index 0000000000..9b54ae278d
--- /dev/null
+++ b/xen/include/asm-i386/io.h
@@ -0,0 +1,253 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <xeno/config.h>
+#include <asm/page.h>
+
+#define IO_SPACE_LIMIT 0xffff
+
+/*#include <linux/vmalloc.h>*/
+
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#if CONFIG_DEBUG_IOVIRT
+ extern void *__io_virt_debug(unsigned long x, const char *file, int line);
+ extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line);
+ #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
+//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__)
+#else
+ #define __io_virt(x) ((void *)(x))
+//#define __io_phys(x) __pa(x)
+#endif
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+ return __pa(address);
+}
+
+static inline void * phys_to_virt(unsigned long address)
+{
+ return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT)
+
+extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+static inline void * ioremap (unsigned long offset, unsigned long size)
+{
+ return __ioremap(offset, size, 0);
+}
+
+/*
+ * This one maps high address device memory and turns off caching for that area.
+ * it's useful if some control registers are in such an area and write combining
+ * or read caching is not desirable:
+ */
+static inline void * ioremap_nocache (unsigned long offset, unsigned long size)
+{
+ return __ioremap(offset, size, _PAGE_PCD);
+}
+
+extern void iounmap(void *addr);
+
+/*
+ * IO bus memory addresses are also 1:1 with the physical address
+ */
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
+#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
+#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
+#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
+#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define memset_io(a,b,c) memset(__io_virt(a),(b),(c))
+#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c))
+#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c))
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, i386 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
+
+static inline int check_signature(unsigned long io_addr,
+ const unsigned char *signature, int length)
+{
+ int retval = 0;
+ do {
+ if (readb(io_addr) != *signature)
+ goto out;
+ io_addr++;
+ signature++;
+ length--;
+ } while (length);
+ retval = 1;
+out:
+ return retval;
+}
+
+static inline int isa_check_signature(unsigned long io_addr,
+ const unsigned char *signature, int length)
+{
+ int retval = 0;
+ do {
+ if (isa_readb(io_addr) != *signature)
+ goto out;
+ io_addr++;
+ signature++;
+ length--;
+ } while (length);
+ retval = 1;
+out:
+ return retval;
+}
+
+/*
+ * Cache management
+ *
+ * This needed for two cases
+ * 1. Out of order aware processors
+ * 2. Accidentally out of order processors (PPro errata #51)
+ */
+
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+static inline void flush_write_buffers(void)
+{
+ __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
+}
+
+#define dma_cache_inv(_start,_size) flush_write_buffers()
+#define dma_cache_wback(_start,_size) flush_write_buffers()
+#define dma_cache_wback_inv(_start,_size) flush_write_buffers()
+
+#else
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size) do { } while (0)
+#define dma_cache_wback(_start,_size) do { } while (0)
+#define dma_cache_wback_inv(_start,_size) do { } while (0)
+#define flush_write_buffers()
+
+#endif
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
+#else
+#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#else
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+
+/*
+ * Talk about misusing macros..
+ */
+#define __OUT1(s,x) \
+static inline void out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));}
+
+#define __IN1(s) \
+static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; }
+
+#define __INS(s) \
+static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; ins" #s \
+: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define __OUTS(s) \
+static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__INS(b)
+__INS(w)
+__INS(l)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+#endif
diff --git a/xen/include/asm-i386/io_apic.h b/xen/include/asm-i386/io_apic.h
new file mode 100644
index 0000000000..44916209a8
--- /dev/null
+++ b/xen/include/asm-i386/io_apic.h
@@ -0,0 +1,148 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define APIC_MISMATCH_DEBUG
+
+#define IO_APIC_BASE(idx) \
+ ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
+
+/*
+ * The structure of the IO-APIC:
+ */
+struct IO_APIC_reg_00 {
+ __u32 __reserved_2 : 24,
+ ID : 4,
+ __reserved_1 : 4;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_01 {
+ __u32 version : 8,
+ __reserved_2 : 7,
+ PRQ : 1,
+ entries : 8,
+ __reserved_1 : 8;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_02 {
+ __u32 __reserved_2 : 24,
+ arbitration : 4,
+ __reserved_1 : 4;
+} __attribute__ ((packed));
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+enum ioapic_irq_destination_types {
+ dest_Fixed = 0,
+ dest_LowestPrio = 1,
+ dest_SMI = 2,
+ dest__reserved_1 = 3,
+ dest_NMI = 4,
+ dest_INIT = 5,
+ dest__reserved_2 = 6,
+ dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+ __u32 vector : 8,
+ delivery_mode : 3, /* 000: FIXED
+ * 001: lowest prio
+ * 111: ExtINT
+ */
+ dest_mode : 1, /* 0: physical, 1: logical */
+ delivery_status : 1,
+ polarity : 1,
+ irr : 1,
+ trigger : 1, /* 0: edge, 1: level */
+ mask : 1, /* 0: enabled, 1: disabled */
+ __reserved_2 : 15;
+
+ union { struct { __u32
+ __reserved_1 : 24,
+ physical_dest : 4,
+ __reserved_2 : 4;
+ } physical;
+
+ struct { __u32
+ __reserved_1 : 24,
+ logical_dest : 8;
+ } logical;
+ } dest;
+
+} __attribute__ ((packed));
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc *mp_irqs;
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+ *IO_APIC_BASE(apic) = reg;
+ return *(IO_APIC_BASE(apic)+4);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+ *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+ (void) *(IO_APIC_BASE(apic)+4);
+}
+
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
+
+#else /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
+#endif
diff --git a/xen/include/asm-i386/ioctl.h b/xen/include/asm-i386/ioctl.h
new file mode 100644
index 0000000000..c75f20ade6
--- /dev/null
+++ b/xen/include/asm-i386/ioctl.h
@@ -0,0 +1,75 @@
+/* $Id: ioctl.h,v 1.5 1993/07/19 21:53:50 root Exp root $
+ *
+ * linux/ioctl.h for Linux by H.H. Bergman.
+ */
+
+#ifndef _ASMI386_IOCTL_H
+#define _ASMI386_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * is useful for catching programs compiled with old versions
+ * and to avoid overwriting user space outside the user buffer area.
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms. The i386 ioctl numbering scheme doesn't really enforce
+ * a type field. De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here. Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS 8
+#define _IOC_TYPEBITS 8
+#define _IOC_SIZEBITS 14
+#define _IOC_DIRBITS 2
+
+#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT 0
+#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE 0U
+#define _IOC_WRITE 1U
+#define _IOC_READ 2U
+
+#define _IOC(dir,type,nr,size) \
+ (((dir) << _IOC_DIRSHIFT) | \
+ ((type) << _IOC_TYPESHIFT) | \
+ ((nr) << _IOC_NRSHIFT) | \
+ ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT (_IOC_SIZESHIFT)
+
+#endif /* _ASMI386_IOCTL_H */
diff --git a/xen/include/asm-i386/irq.h b/xen/include/asm-i386/irq.h
new file mode 100644
index 0000000000..41049e71b2
--- /dev/null
+++ b/xen/include/asm-i386/irq.h
@@ -0,0 +1,203 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */
+
+#include <xeno/config.h>
+#include <asm/atomic.h>
+
+#define SA_INTERRUPT 0x20000000
+#define SA_SHIRQ 0x04000000
+
+#define SA_SAMPLE_RANDOM 0 /* Linux driver compatibility */
+
+#define TIMER_IRQ 0
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR 0x30
+
+#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR)
+
+#define HYPERVISOR_CALL_VECTOR 0x82
+
+/*
+ * Vectors 0x30-0x3f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ * some of the following vectors are 'rare', they are merged
+ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ * TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ * Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ */
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
+#define INVALIDATE_TLB_VECTOR 0xfd
+#define EVENT_CHECK_VECTOR 0xfc
+#define CALL_FUNCTION_VECTOR 0xfb
+#define KDB_VECTOR 0xfa
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x40-0xee)
+ * we start at 0x41 to spread out vectors evenly between priority
+ * levels. (0x82 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR 0x41
+#define FIRST_SYSTEM_VECTOR 0xef
+
+extern int irq_vector[NR_IRQS];
+#define IO_APIC_VECTOR(irq) irq_vector[irq]
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern void mask_irq(unsigned int irq);
+extern void unmask_irq(unsigned int irq);
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void send_IPI(int dest, int vector);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+extern char _stext, _etext;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#define SAVE_ALL \
+ "cld\n\t" \
+ "pushl %es\n\t" \
+ "pushl %ds\n\t" \
+ "pushl %eax\n\t" \
+ "pushl %ebp\n\t" \
+ "pushl %edi\n\t" \
+ "pushl %esi\n\t" \
+ "pushl %edx\n\t" \
+ "pushl %ecx\n\t" \
+ "pushl %ebx\n\t" \
+ "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \
+ "movl %edx,%ds\n\t" \
+ "movl %edx,%es\n\t"
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+#define GET_CURRENT \
+ "movl %esp, %ebx\n\t" \
+ "andl $-8192, %ebx\n\t"
+
+/*
+ * SMP has a few special interrupts for IPI messages
+ */
+
+ /* there is a second layer of macro just to get the symbolic
+ name for the vector evaluated. This change is for RTLinux */
+#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
+#define XBUILD_SMP_INTERRUPT(x,v)\
+asmlinkage void x(void); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+ "pushl $"#v"-256\n\t" \
+ SAVE_ALL \
+ SYMBOL_NAME_STR(call_##x)":\n\t" \
+ "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
+#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
+asmlinkage void x(struct pt_regs * regs); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+ "pushl $"#v"-256\n\t" \
+ SAVE_ALL \
+ "movl %esp,%eax\n\t" \
+ "pushl %eax\n\t" \
+ SYMBOL_NAME_STR(call_##x)":\n\t" \
+ "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+ "addl $4,%esp\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_COMMON_IRQ() \
+asmlinkage void call_do_IRQ(void); \
+__asm__( \
+ "\n" __ALIGN_STR"\n" \
+ "common_interrupt:\n\t" \
+ SAVE_ALL \
+ SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \
+ "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \
+ "jmp ret_from_intr\n");
+
+/*
+ * subtle. orig_eax is used by the signal code to distinct between
+ * system calls and interrupted 'random user-space'. Thus we have
+ * to put a negative value into orig_eax here. (the problem is that
+ * both system calls and IRQs want to have small integer numbers in
+ * orig_eax, and the syscall code has won the optimization conflict ;)
+ *
+ * Subtle as a pigs ear. VY
+ */
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
+ "pushl $"#nr"-256\n\t" \
+ "jmp common_interrupt");
+
+extern unsigned long prof_cpu_mask;
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+#include <xeno/irq.h>
+
+#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
+ if (IO_APIC_IRQ(i))
+ send_IPI_self(IO_APIC_VECTOR(i));
+}
+#else
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
+#endif
+
+#endif /* _ASM_HW_IRQ_H */
diff --git a/xen/include/asm-i386/mc146818rtc.h b/xen/include/asm-i386/mc146818rtc.h
new file mode 100644
index 0000000000..03a4efa9e8
--- /dev/null
+++ b/xen/include/asm-i386/mc146818rtc.h
@@ -0,0 +1,113 @@
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+#include <xeno/spinlock.h>
+
+extern spinlock_t rtc_lock; /* serialize CMOS RAM access */
+
+/**********************************************************************
+ * register summary
+ **********************************************************************/
+#define RTC_SECONDS 0
+#define RTC_SECONDS_ALARM 1
+#define RTC_MINUTES 2
+#define RTC_MINUTES_ALARM 3
+#define RTC_HOURS 4
+#define RTC_HOURS_ALARM 5
+/* RTC_*_alarm is always true if 2 MSBs are set */
+# define RTC_ALARM_DONT_CARE 0xC0
+
+#define RTC_DAY_OF_WEEK 6
+#define RTC_DAY_OF_MONTH 7
+#define RTC_MONTH 8
+#define RTC_YEAR 9
+
+/* control registers - Moto names
+ */
+#define RTC_REG_A 10
+#define RTC_REG_B 11
+#define RTC_REG_C 12
+#define RTC_REG_D 13
+
+/**********************************************************************
+ * register details
+ **********************************************************************/
+#define RTC_FREQ_SELECT RTC_REG_A
+
+/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus,
+ * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete,
+ * totalling to a max high interval of 2.228 ms.
+ */
+# define RTC_UIP 0x80
+# define RTC_DIV_CTL 0x70
+ /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */
+# define RTC_REF_CLCK_4MHZ 0x00
+# define RTC_REF_CLCK_1MHZ 0x10
+# define RTC_REF_CLCK_32KHZ 0x20
+ /* 2 values for divider stage reset, others for "testing purposes only" */
+# define RTC_DIV_RESET1 0x60
+# define RTC_DIV_RESET2 0x70
+ /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */
+# define RTC_RATE_SELECT 0x0F
+
+/**********************************************************************/
+#define RTC_CONTROL RTC_REG_B
+# define RTC_SET 0x80 /* disable updates for clock setting */
+# define RTC_PIE 0x40 /* periodic interrupt enable */
+# define RTC_AIE 0x20 /* alarm interrupt enable */
+# define RTC_UIE 0x10 /* update-finished interrupt enable */
+# define RTC_SQWE 0x08 /* enable square-wave output */
+# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
+# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
+# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
+
+/**********************************************************************/
+#define RTC_INTR_FLAGS RTC_REG_C
+/* caution - cleared by read */
+# define RTC_IRQF 0x80 /* any of the following 3 is active */
+# define RTC_PF 0x40
+# define RTC_AF 0x20
+# define RTC_UF 0x10
+
+/**********************************************************************/
+#define RTC_VALID RTC_REG_D
+# define RTC_VRT 0x80 /* valid RAM and time */
+/**********************************************************************/
+
+/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
+ * determines if the following two #defines are needed
+ */
+#ifndef BCD_TO_BIN
+#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
+#endif
+
+#ifndef BIN_TO_BCD
+#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
+#endif
+
+
+#ifndef RTC_PORT
+#define RTC_PORT(x) (0x70 + (x))
+#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/xen/include/asm-i386/mpspec.h b/xen/include/asm-i386/mpspec.h
new file mode 100644
index 0000000000..2829cb54a3
--- /dev/null
+++ b/xen/include/asm-i386/mpspec.h
@@ -0,0 +1,224 @@
+#ifndef __ASM_MPSPEC_H
+#define __ASM_MPSPEC_H
+
+
+/*
+ * Structure definitions for SMP machines following the
+ * Intel Multiprocessing Specification 1.1 and 1.4.
+ */
+
+/*
+ * This tag identifies where the SMP configuration
+ * information is.
+ */
+
+#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
+
+/*
+ * a maximum of 16 APICs with the current APIC ID architecture.
+ * xAPICs can have up to 256. SAPICs have 16 ID bits.
+ */
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_APICS 256
+#else
+#define MAX_APICS 16
+#endif
+
+#define MAX_MPC_ENTRY 1024
+
+struct intel_mp_floating
+{
+ char mpf_signature[4]; /* "_MP_" */
+ unsigned long mpf_physptr; /* Configuration table address */
+ unsigned char mpf_length; /* Our length (paragraphs) */
+ unsigned char mpf_specification;/* Specification version */
+ unsigned char mpf_checksum; /* Checksum (makes sum 0) */
+ unsigned char mpf_feature1; /* Standard or configuration ? */
+ unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
+ unsigned char mpf_feature3; /* Unused (0) */
+ unsigned char mpf_feature4; /* Unused (0) */
+ unsigned char mpf_feature5; /* Unused (0) */
+};
+
+struct mp_config_table
+{
+ char mpc_signature[4];
+#define MPC_SIGNATURE "PCMP"
+ unsigned short mpc_length; /* Size of table */
+ char mpc_spec; /* 0x01 */
+ char mpc_checksum;
+ char mpc_oem[8];
+ char mpc_productid[12];
+ unsigned long mpc_oemptr; /* 0 if not present */
+ unsigned short mpc_oemsize; /* 0 if not present */
+ unsigned short mpc_oemcount;
+ unsigned long mpc_lapic; /* APIC address */
+ unsigned long reserved;
+};
+
+/* Followed by entries */
+
+#define MP_PROCESSOR 0
+#define MP_BUS 1
+#define MP_IOAPIC 2
+#define MP_INTSRC 3
+#define MP_LINTSRC 4
+#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */
+
+struct mpc_config_processor
+{
+ unsigned char mpc_type;
+ unsigned char mpc_apicid; /* Local APIC number */
+ unsigned char mpc_apicver; /* Its versions */
+ unsigned char mpc_cpuflag;
+#define CPU_ENABLED 1 /* Processor is available */
+#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
+ unsigned long mpc_cpufeature;
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK 0xF0
+#define CPU_FAMILY_MASK 0xF00
+ unsigned long mpc_featureflag; /* CPUID feature value */
+ unsigned long mpc_reserved[2];
+};
+
+struct mpc_config_bus
+{
+ unsigned char mpc_type;
+ unsigned char mpc_busid;
+ unsigned char mpc_bustype[6] __attribute((packed));
+};
+
+/* List of Bus Type string values, Intel MP Spec. */
+#define BUSTYPE_EISA "EISA"
+#define BUSTYPE_ISA "ISA"
+#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
+#define BUSTYPE_MCA "MCA"
+#define BUSTYPE_VL "VL" /* Local bus */
+#define BUSTYPE_PCI "PCI"
+#define BUSTYPE_PCMCIA "PCMCIA"
+#define BUSTYPE_CBUS "CBUS"
+#define BUSTYPE_CBUSII "CBUSII"
+#define BUSTYPE_FUTURE "FUTURE"
+#define BUSTYPE_MBI "MBI"
+#define BUSTYPE_MBII "MBII"
+#define BUSTYPE_MPI "MPI"
+#define BUSTYPE_MPSA "MPSA"
+#define BUSTYPE_NUBUS "NUBUS"
+#define BUSTYPE_TC "TC"
+#define BUSTYPE_VME "VME"
+#define BUSTYPE_XPRESS "XPRESS"
+
+struct mpc_config_ioapic
+{
+ unsigned char mpc_type;
+ unsigned char mpc_apicid;
+ unsigned char mpc_apicver;
+ unsigned char mpc_flags;
+#define MPC_APIC_USABLE 0x01
+ unsigned long mpc_apicaddr;
+};
+
+struct mpc_config_intsrc
+{
+ unsigned char mpc_type;
+ unsigned char mpc_irqtype;
+ unsigned short mpc_irqflag;
+ unsigned char mpc_srcbus;
+ unsigned char mpc_srcbusirq;
+ unsigned char mpc_dstapic;
+ unsigned char mpc_dstirq;
+};
+
+enum mp_irq_source_types {
+ mp_INT = 0,
+ mp_NMI = 1,
+ mp_SMI = 2,
+ mp_ExtINT = 3
+};
+
+#define MP_IRQDIR_DEFAULT 0
+#define MP_IRQDIR_HIGH 1
+#define MP_IRQDIR_LOW 3
+
+
+struct mpc_config_lintsrc
+{
+ unsigned char mpc_type;
+ unsigned char mpc_irqtype;
+ unsigned short mpc_irqflag;
+ unsigned char mpc_srcbusid;
+ unsigned char mpc_srcbusirq;
+ unsigned char mpc_destapic;
+#define MP_APIC_ALL 0xFF
+ unsigned char mpc_destapiclint;
+};
+
+struct mp_config_oemtable
+{
+ char oem_signature[4];
+#define MPC_OEM_SIGNATURE "_OEM"
+ unsigned short oem_length; /* Size of table */
+ char oem_rev; /* 0x01 */
+ char oem_checksum;
+ char mpc_oem[8];
+};
+
+struct mpc_config_translation
+{
+ unsigned char mpc_type;
+ unsigned char trans_len;
+ unsigned char trans_type;
+ unsigned char trans_quad;
+ unsigned char trans_global;
+ unsigned char trans_local;
+ unsigned short trans_reserved;
+};
+
+/*
+ * Default configurations
+ *
+ * 1 2 CPU ISA 82489DX
+ * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
+ * 3 2 CPU EISA 82489DX
+ * 4 2 CPU MCA 82489DX
+ * 5 2 CPU ISA+PCI
+ * 6 2 CPU EISA+PCI
+ * 7 2 CPU MCA+PCI
+ */
+
+#ifdef CONFIG_MULTIQUAD
+#define MAX_IRQ_SOURCES 512
+#else /* !CONFIG_MULTIQUAD */
+#define MAX_IRQ_SOURCES 256
+#endif /* CONFIG_MULTIQUAD */
+
+#define MAX_MP_BUSSES 32
+enum mp_bustype {
+ MP_BUS_ISA = 1,
+ MP_BUS_EISA,
+ MP_BUS_PCI,
+ MP_BUS_MCA
+};
+extern int *mp_bus_id_to_type;
+extern int *mp_bus_id_to_node;
+extern int *mp_bus_id_to_local;
+extern int *mp_bus_id_to_pci_bus;
+extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern unsigned long phys_cpu_present_map;
+extern int smp_found_config;
+extern void find_smp_config (void);
+extern void get_smp_config (void);
+extern int nr_ioapics;
+extern int apic_version [MAX_APICS];
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc *mp_irqs;
+extern int mpc_default_type;
+extern int mp_current_pci_id;
+extern unsigned long mp_lapic_addr;
+extern int pic_mode;
+extern int using_apic_timer;
+
+#endif
+
diff --git a/xen/include/asm-i386/msr.h b/xen/include/asm-i386/msr.h
new file mode 100644
index 0000000000..11bcb7f29e
--- /dev/null
+++ b/xen/include/asm-i386/msr.h
@@ -0,0 +1,104 @@
+#ifndef __ASM_MSR_H
+#define __ASM_MSR_H
+
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+
+#define rdmsr(msr,val1,val2) \
+ __asm__ __volatile__("rdmsr" \
+ : "=a" (val1), "=d" (val2) \
+ : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+ __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+
+#define rdtscll(val) \
+ __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+ __asm__ __volatile__("rdpmc" \
+ : "=a" (low), "=d" (high) \
+ : "c" (counter))
+
+/* symbolic names for some interesting MSRs */
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR 0
+#define MSR_IA32_P5_MC_TYPE 1
+#define MSR_IA32_PLATFORM_ID 0x17
+#define MSR_IA32_EBL_CR_POWERON 0x2a
+
+#define MSR_IA32_APICBASE 0x1b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_IA32_UCODE_WRITE 0x79
+#define MSR_IA32_UCODE_REV 0x8b
+
+#define MSR_IA32_PERFCTR0 0xc1
+#define MSR_IA32_PERFCTR1 0xc2
+
+#define MSR_IA32_BBL_CR_CTL 0x119
+
+#define MSR_IA32_MCG_CAP 0x179
+#define MSR_IA32_MCG_STATUS 0x17a
+#define MSR_IA32_MCG_CTL 0x17b
+
+#define MSR_IA32_EVNTSEL0 0x186
+#define MSR_IA32_EVNTSEL1 0x187
+
+#define MSR_IA32_DEBUGCTLMSR 0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP 0x1db
+#define MSR_IA32_LASTBRANCHTOIP 0x1dc
+#define MSR_IA32_LASTINTFROMIP 0x1dd
+#define MSR_IA32_LASTINTTOIP 0x1de
+
+#define MSR_IA32_MC0_CTL 0x400
+#define MSR_IA32_MC0_STATUS 0x401
+#define MSR_IA32_MC0_ADDR 0x402
+#define MSR_IA32_MC0_MISC 0x403
+
+/* AMD Defined MSRs */
+#define MSR_K6_EFER 0xC0000080
+#define MSR_K6_STAR 0xC0000081
+#define MSR_K6_WHCR 0xC0000082
+#define MSR_K6_UWCCR 0xC0000085
+#define MSR_K6_PSOR 0xC0000087
+#define MSR_K6_PFIR 0xC0000088
+
+#define MSR_K7_EVNTSEL0 0xC0010000
+#define MSR_K7_PERFCTR0 0xC0010004
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1 0x107
+#define MSR_IDT_FCR2 0x108
+#define MSR_IDT_FCR3 0x109
+#define MSR_IDT_FCR4 0x10a
+
+#define MSR_IDT_MCR0 0x110
+#define MSR_IDT_MCR1 0x111
+#define MSR_IDT_MCR2 0x112
+#define MSR_IDT_MCR3 0x113
+#define MSR_IDT_MCR4 0x114
+#define MSR_IDT_MCR5 0x115
+#define MSR_IDT_MCR6 0x116
+#define MSR_IDT_MCR7 0x117
+#define MSR_IDT_MCR_CTRL 0x120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR 0x1107
+
+#endif /* __ASM_MSR_H */
diff --git a/xen/include/asm-i386/page.h b/xen/include/asm-i386/page.h
new file mode 100644
index 0000000000..63b5c73afd
--- /dev/null
+++ b/xen/include/asm-i386/page.h
@@ -0,0 +1,175 @@
+#ifndef _I386_PAGE_H
+#define _I386_PAGE_H
+
+
+#ifndef __ASSEMBLY__
+#define BUG() do { \
+ printk("BUG at %s:%d\n", __FILE__, __LINE__); \
+ __asm__ __volatile__("ud2"); \
+} while (0)
+#endif /* __ASSEMBLY__ */
+
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+
+#define ENTRIES_PER_L1_PAGETABLE 1024
+#define ENTRIES_PER_L2_PAGETABLE 1024
+
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
+#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <xeno/config.h>
+typedef struct { unsigned long l1_lo; } l1_pgentry_t;
+typedef struct { unsigned long l2_lo; } l2_pgentry_t;
+typedef l1_pgentry_t *l1_pagetable_t;
+typedef l2_pgentry_t *l2_pagetable_t;
+typedef struct { unsigned long pt_lo; } pagetable_t;
+#endif /* !__ASSEMBLY__ */
+
+/* Strip type from a table entry. */
+#define l1_pgentry_val(_x) ((_x).l1_lo)
+#define l2_pgentry_val(_x) ((_x).l2_lo)
+#define pagetable_val(_x) ((_x).pt_lo)
+
+#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL))
+#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL))
+
+/* Add type to a table entry. */
+#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } )
+#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } )
+#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
+
+/* Turn a typed table entry into a page index. */
+#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT)
+#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
+
+/* Turn a typed table entry into a physical address. */
+#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
+#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
+
+/* Dereference a typed level-2 entry to yield a typed level-1 table. */
+#define l2_pgentry_to_l1(_x) \
+ ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
+#define l2_table_offset(_a) \
+ ((_a) >> L2_PAGETABLE_SHIFT)
+
+/* Hypervisor table entries use zero to sugnify 'empty'. */
+#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x))
+#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x))
+
+#define __PAGE_OFFSET (0xFC400000)
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT))
+#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page) ((page - frame_table) < max_mapnr)
+
+/* High table entries are reserved by the hypervisor. */
+#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
+ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
+#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
+ (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/flushtlb.h>
+
+extern l2_pgentry_t idle0_pg_table[ENTRIES_PER_L2_PAGETABLE];
+extern l2_pgentry_t *idle_pg_table[NR_CPUS];
+extern void paging_init(void);
+
+#define __flush_tlb() __flush_tlb_counted()
+
+/* Flush global pages as well. */
+
+#define __pge_off() \
+ do { \
+ __asm__ __volatile__( \
+ "movl %0, %%cr4; # turn off PGE " \
+ :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \
+ } while (0)
+
+#define __pge_on() \
+ do { \
+ __asm__ __volatile__( \
+ "movl %0, %%cr4; # turn off PGE " \
+ :: "r" (mmu_cr4_features)); \
+ } while (0)
+
+
+#define __flush_tlb_all() \
+ do { \
+ __pge_off(); \
+ __flush_tlb_counted(); \
+ __pge_on(); \
+ } while (0)
+
+#define __flush_tlb_one(__addr) \
+__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
+
+#endif /* !__ASSEMBLY__ */
+
+
+#define _PAGE_PRESENT 0x001
+#define _PAGE_RW 0x002
+#define _PAGE_USER 0x004
+#define _PAGE_PWT 0x008
+#define _PAGE_PCD 0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY 0x040
+#define _PAGE_PAT 0x080
+#define _PAGE_PSE 0x080
+#define _PAGE_GLOBAL 0x100
+
+#define __PAGE_HYPERVISOR \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_NOCACHE \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_RO \
+ (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
+
+#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
+#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO)
+#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
+
+#define mk_l2_writeable(_p) \
+ (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l2_readonly(_p) \
+ (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW))
+#define mk_l1_writeable(_p) \
+ (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l1_readonly(_p) \
+ (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW))
+
+
+#ifndef __ASSEMBLY__
+static __inline__ int get_order(unsigned long size)
+{
+ int order;
+
+ size = (size-1) >> (PAGE_SHIFT-1);
+ order = -1;
+ do {
+ size >>= 1;
+ order++;
+ } while (size);
+ return order;
+}
+#endif
+
+#endif /* _I386_PAGE_H */
diff --git a/xen/include/asm-i386/param.h b/xen/include/asm-i386/param.h
new file mode 100644
index 0000000000..1b10bf49fe
--- /dev/null
+++ b/xen/include/asm-i386/param.h
@@ -0,0 +1,24 @@
+#ifndef _ASMi386_PARAM_H
+#define _ASMi386_PARAM_H
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#define EXEC_PAGESIZE 4096
+
+#ifndef NGROUPS
+#define NGROUPS 32
+#endif
+
+#ifndef NOGROUP
+#define NOGROUP (-1)
+#endif
+
+#define MAXHOSTNAMELEN 64 /* max length of hostname */
+
+#ifdef __KERNEL__
+# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */
+#endif
+
+#endif
diff --git a/xen/include/asm-i386/pci.h b/xen/include/asm-i386/pci.h
new file mode 100644
index 0000000000..1ffade8914
--- /dev/null
+++ b/xen/include/asm-i386/pci.h
@@ -0,0 +1,286 @@
+#ifndef __i386_PCI_H
+#define __i386_PCI_H
+
+#include <linux/config.h>
+
+#ifdef __KERNEL__
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+ already-configured bus numbers - to be used for buggy BIOSes
+ or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses() 0
+#endif
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO 0x1000
+#define PCIBIOS_MIN_MEM (pci_mem_start)
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* Dynamic DMA mapping stuff.
+ * i386 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+/*#include <linux/string.h>*/
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The PCI address space does equal the physical memory
+ * address space. The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS (1)
+
+/* Allocate and map kernel buffer using consistent mode DMA for a device.
+ * hwdev should be valid struct pci_dev pointer for PCI devices,
+ * NULL for PCI-like buses (ISA, EISA).
+ * Returns non-NULL cpu-view pointer to the buffer if successful and
+ * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
+ * is undefined.
+ */
+extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+ dma_addr_t *dma_handle);
+
+/* Free and unmap a consistent DMA buffer.
+ * cpu_addr is what was returned from pci_alloc_consistent,
+ * size must be the same as what as passed into pci_alloc_consistent,
+ * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ *
+ * References to the memory and mappings associated with cpu_addr/dma_addr
+ * past this call are illegal.
+ */
+extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle);
+
+/* Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+ size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+ flush_write_buffers();
+ return virt_to_bus(ptr);
+}
+
+/* Unmap a single streaming mode DMA translation. The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call. All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
+ size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+ /* Nothing to do */
+}
+
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct pfn_info instead of a virtual address
+ */
+static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page,
+ unsigned long offset, size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+
+ return (dma_addr_t)(page - frame_table) * PAGE_SIZE + offset;
+}
+
+static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+ size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+ /* Nothing to do */
+}
+
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME) (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME) (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
+
+/* Map a set of buffers described by scatterlist in streaming
+ * mode for DMA. This is the scather-gather version of the
+ * above pci_map_single interface. Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length. They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+ int nents, int direction)
+{
+ int i;
+
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+
+ /*
+ * temporary 2.4 hack
+ */
+ for (i = 0; i < nents; i++ ) {
+ if (sg[i].address && sg[i].page)
+ out_of_line_bug();
+
+ /* not worth checking since NULL is ok says SMH */
+#if 0
+ else if (!sg[i].address && !sg[i].page)
+ out_of_line_bug();
+#endif
+
+ if (sg[i].address)
+ sg[i].dma_address = virt_to_bus(sg[i].address);
+ else
+ sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+ }
+
+ flush_write_buffers();
+ return nents;
+}
+
+/* Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+ int nents, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+ /* Nothing to do */
+}
+
+/* Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so. At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+static inline void pci_dma_sync_single(struct pci_dev *hwdev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+ flush_write_buffers();
+}
+
+/* Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ out_of_line_bug();
+ flush_write_buffers();
+}
+
+/* Return whether the given PCI device DMA address mask can
+ * be supported properly. For example, if your device can
+ * only drive the low 24-bits during PCI bus mastering, then
+ * you would pass 0x00ffffff as the mask to this function.
+ */
+static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
+{
+ /*
+ * we fall back to GFP_DMA when the mask isn't all 1s,
+ * so we can't guarantee allocations that must be
+ * within a tighter range than GFP_DMA..
+ */
+ if(mask < 0x00ffffff)
+ return 0;
+
+ return 1;
+}
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask) (1)
+
+static __inline__ dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction)
+{
+ return ((dma64_addr_t) page_to_bus(page) +
+ (dma64_addr_t) offset);
+}
+
+static __inline__ struct pfn_info *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+ unsigned long poff = (dma_addr >> PAGE_SHIFT);
+
+ return frame_table + poff;
+}
+
+static __inline__ unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+ return (dma_addr & ~PAGE_MASK);
+}
+
+static __inline__ void
+pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+ flush_write_buffers();
+}
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg) ((sg)->dma_address)
+#define sg_dma_len(sg) ((sg)->length)
+
+/* Return the index of the PCI controller for device. */
+static inline int pci_controller_num(struct pci_dev *dev)
+{
+ return 0;
+}
+
+#if 0 /* XXX Not in land of Xen XXX */
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine);
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __i386_PCI_H */
diff --git a/xen/include/asm-i386/pgalloc.h b/xen/include/asm-i386/pgalloc.h
new file mode 100644
index 0000000000..fcba5e1585
--- /dev/null
+++ b/xen/include/asm-i386/pgalloc.h
@@ -0,0 +1,117 @@
+#ifndef _I386_PGALLOC_H
+#define _I386_PGALLOC_H
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+
+#define pgd_quicklist (current_cpu_data.pgd_quick)
+#define pmd_quicklist (current_cpu_data.pmd_quick)
+#define pte_quicklist (current_cpu_data.pte_quick)
+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+
+
+/*
+ * Allocate and free page tables.
+ */
+
+
+#define pte_free(pte) pte_free_fast(pte)
+#define pgd_alloc(mm) get_pgd_fast()
+#define pgd_free(pgd) free_pgd_fast(pgd)
+
+/*
+ * allocating and freeing a pmd is trivial: the 1-entry pmd is
+ * inside the pgd, so has no extra memory associated with it.
+ * (In the PAE case we free the pmds as part of the pgd.)
+ */
+
+#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); })
+#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
+#define pmd_free_slow(x) do { } while (0)
+#define pmd_free_fast(x) do { } while (0)
+#define pmd_free(x) do { } while (0)
+#define pgd_populate(mm, pmd, pte) BUG()
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(mm, start, end) flushes a range of pages
+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (mm == current->active_mm)
+ __flush_tlb();
+}
+
+#if 0
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ if (vma->vm_mm == current->active_mm)
+ __flush_tlb_one(addr);
+}
+#endif
+
+static inline void flush_tlb_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ if (mm == current->active_mm)
+ __flush_tlb();
+}
+
+#else
+
+#include <xeno/smp.h>
+
+#define local_flush_tlb() \
+ __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+/*extern void flush_tlb_page(struct vm_area_struct *, unsigned long);*/
+
+#define flush_tlb() flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end)
+{
+ flush_tlb_mm(mm);
+}
+
+#define TLBSTATE_OK 1
+#define TLBSTATE_LAZY 2
+
+struct tlb_state
+{
+ struct mm_struct *active_mm;
+ int state;
+};
+extern struct tlb_state cpu_tlbstate[NR_CPUS];
+
+
+#endif
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ /* i386 does not keep any page table caches in TLB */
+}
+
+#endif /* _I386_PGALLOC_H */
diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h
new file mode 100644
index 0000000000..36a50b2976
--- /dev/null
+++ b/xen/include/asm-i386/processor.h
@@ -0,0 +1,501 @@
+/*
+ * include/asm-i386/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_I386_PROCESSOR_H
+#define __ASM_I386_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/cpufeature.h>
+#include <asm/desc.h>
+#include <xeno/config.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+/*
+ * CPU type and hardware bug flags. Kept separately for each CPU.
+ * Members of this structure are referenced in head.S, so think twice
+ * before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
+ __u32 x86_capability[NCAPINTS];
+ char x86_vendor_id[16];
+ unsigned long *pgd_quick;
+ unsigned long *pmd_quick;
+ unsigned long *pte_quick;
+ unsigned long pgtable_cache_sz;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+#define cpu_has_pge (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability))
+#define cpu_has_pse (test_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability))
+#define cpu_has_pae (test_bit(X86_FEATURE_PAE, boot_cpu_data.x86_capability))
+#define cpu_has_tsc (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability))
+#define cpu_has_de (test_bit(X86_FEATURE_DE, boot_cpu_data.x86_capability))
+#define cpu_has_vme (test_bit(X86_FEATURE_VME, boot_cpu_data.x86_capability))
+#define cpu_has_fxsr (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability))
+#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability))
+#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability))
+#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability))
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
+ */
+static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax;
+
+ __asm__("cpuid"
+ : "=a" (eax)
+ : "0" (op)
+ : "bx", "cx", "dx");
+ return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx)
+ : "0" (op)
+ : "cx", "dx" );
+ return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ecx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=c" (ecx)
+ : "0" (op)
+ : "bx", "dx" );
+ return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=d" (edx)
+ : "0" (op)
+ : "bx", "cx");
+ return edx;
+}
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+#define X86_CR4_MCE 0x0040 /* Machine check enable */
+#define X86_CR4_PGE 0x0080 /* enable global pages */
+#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+ mmu_cr4_features |= mask;
+ __asm__("movl %%cr4,%%eax\n\t"
+ "orl %0,%%eax\n\t"
+ "movl %%eax,%%cr4\n"
+ : : "irg" (mask)
+ :"ax");
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+ mmu_cr4_features &= ~mask;
+ __asm__("movl %%cr4,%%eax\n\t"
+ "andl %0,%%eax\n\t"
+ "movl %%eax,%%cr4\n"
+ : : "irg" (~mask)
+ :"ax");
+}
+
+/*
+ * Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ * Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+ outb((reg), 0x22); \
+ outb((data), 0x23); \
+} while (0)
+
+#define EISA_bus (0)
+#define MCA_bus (0)
+
+/* from system description table in BIOS. Mostly for MCA use, but
+others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE (PAGE_OFFSET)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
+
+/*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ */
+#define IO_BITMAP_SIZE 32
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fsave_struct {
+ long cwd;
+ long swd;
+ long twd;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
+ long status; /* software status information */
+};
+
+struct i387_fxsave_struct {
+ unsigned short cwd;
+ unsigned short swd;
+ unsigned short twd;
+ unsigned short fop;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long mxcsr;
+ long reserved;
+ long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
+ long padding[56];
+} __attribute__ ((aligned (16)));
+
+struct i387_soft_struct {
+ long cwd;
+ long swd;
+ long twd;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
+ unsigned char ftop, changed, lookahead, no_update, rm, alimit;
+ struct info *info;
+ unsigned long entry_eip;
+};
+
+union i387_union {
+ struct i387_fsave_struct fsave;
+ struct i387_fxsave_struct fxsave;
+ struct i387_soft_struct soft;
+};
+
+typedef struct {
+ unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+ unsigned short back_link,__blh;
+ unsigned long esp0;
+ unsigned short ss0,__ss0h;
+ unsigned long esp1;
+ unsigned short ss1,__ss1h;
+ unsigned long esp2;
+ unsigned short ss2,__ss2h;
+ unsigned long __cr3;
+ unsigned long eip;
+ unsigned long eflags;
+ unsigned long eax,ecx,edx,ebx;
+ unsigned long esp;
+ unsigned long ebp;
+ unsigned long esi;
+ unsigned long edi;
+ unsigned short es, __esh;
+ unsigned short cs, __csh;
+ unsigned short ss, __ssh;
+ unsigned short ds, __dsh;
+ unsigned short fs, __fsh;
+ unsigned short gs, __gsh;
+ unsigned short ldt, __ldth;
+ unsigned short trace, bitmap;
+ unsigned long io_bitmap[IO_BITMAP_SIZE+1];
+ /*
+ * pads the TSS to be cacheline-aligned (size is 0x100)
+ */
+ unsigned long __cacheline_filler[5];
+};
+
+struct thread_struct {
+ unsigned long esp0; /* top of the stack */
+ unsigned long eip; /* in kernel space, saved on task switch */
+ unsigned long esp; /* "" */
+ unsigned long fs; /* "" (NB. DS/ES constant in mon, so no save) */
+ unsigned long gs; /* "" ("") */
+ unsigned long esp1, ss1;
+/* Hardware debugging registers */
+ unsigned long debugreg[8]; /* %%db0-7 debug registers */
+/* fault info */
+ unsigned long cr2, trap_no, error_code;
+/* floating point info */
+ union i387_union i387;
+/* Trap info. */
+ int fast_trap_idx;
+ struct desc_struct fast_trap_desc;
+ trap_info_t traps[256];
+};
+
+#define IDT_ENTRIES 256
+extern struct desc_struct idt_table[];
+extern struct desc_struct *idt_tables[];
+
+#define SET_DEFAULT_FAST_TRAP(_p) \
+ (_p)->fast_trap_idx = 0x20; \
+ (_p)->fast_trap_desc.a = 0; \
+ (_p)->fast_trap_desc.b = 0;
+
+#define CLEAR_FAST_TRAP(_p) \
+ (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ 0, 8))
+
+#define SET_FAST_TRAP(_p) \
+ (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ &((_p)->fast_trap_desc), 8))
+
+#define INIT_THREAD { \
+ sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \
+ 0, 0, 0, 0, 0, 0, \
+ { [0 ... 7] = 0 }, /* debugging registers */ \
+ 0, 0, 0, \
+ { { 0, }, }, /* 387 state */ \
+ 0x20, { 0, 0 }, /* DEFAULT_FAST_TRAP */ \
+ { {0} } /* io permissions */ \
+}
+
+#define INIT_TSS { \
+ 0,0, /* back_link, __blh */ \
+ sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \
+ __HYPERVISOR_DS, 0, /* ss0 */ \
+ 0,0,0,0,0,0, /* stack1, stack2 */ \
+ 0, /* cr3 */ \
+ 0,0, /* eip,eflags */ \
+ 0,0,0,0, /* eax,ecx,edx,ebx */ \
+ 0,0,0,0, /* esp,ebp,esi,edi */ \
+ 0,0,0,0,0,0, /* es,cs,ss */ \
+ 0,0,0,0,0,0, /* ds,fs,gs */ \
+ 0,0, /* ldt */ \
+ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
+ {~0, } /* ioperm */ \
+}
+
+#define start_thread(regs, new_eip, new_esp) do { \
+ __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
+ set_fs(USER_DS); \
+ regs->xds = __USER_DS; \
+ regs->xes = __USER_DS; \
+ regs->xss = __USER_DS; \
+ regs->xcs = __USER_CS; \
+ regs->eip = new_eip; \
+ regs->esp = new_esp; \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/* Copy and release all segment info associated with a VM */
+extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
+extern void release_segments(struct mm_struct * mm);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+static inline unsigned long thread_saved_pc(struct thread_struct *t)
+{
+ return ((unsigned long *)t->esp)[3];
+}
+
+unsigned long get_wchan(struct task_struct *p);
+#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count)
+
+#define idle0_task (idle0_task_union.task)
+#define idle0_stack (idle0_task_union.stack)
+
+struct microcode {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int reserved[5];
+ unsigned int bits[500];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE _IO('6',0)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+ __asm__ __volatile__("rep;nop");
+}
+
+#define cpu_relax() rep_nop()
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+#ifdef CONFIG_MPENTIUMIII
+
+#define ARCH_HAS_PREFETCH
+extern inline void prefetch(const void *x)
+{
+ __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
+}
+
+#elif CONFIG_X86_USE_3DNOW
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+ __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+ __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x) prefetchw(x)
+
+#endif
+
+#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/xen/include/asm-i386/ptrace.h b/xen/include/asm-i386/ptrace.h
new file mode 100644
index 0000000000..509001cf57
--- /dev/null
+++ b/xen/include/asm-i386/ptrace.h
@@ -0,0 +1,86 @@
+#ifndef _I386_PTRACE_H
+#define _I386_PTRACE_H
+
+#define EBX 0
+#define ECX 1
+#define EDX 2
+#define ESI 3
+#define EDI 4
+#define EBP 5
+#define EAX 6
+#define DS 7
+#define ES 8
+#define FS 9
+#define GS 10
+#define ORIG_EAX 11
+#define EIP 12
+#define CS 13
+#define EFL 14
+#define UESP 15
+#define SS 16
+#define FRAME_SIZE 17
+
+/* this struct defines the way the registers are stored on the
+ stack during a system call. */
+
+struct pt_regs {
+ long ebx;
+ long ecx;
+ long edx;
+ long esi;
+ long edi;
+ long ebp;
+ long eax;
+ int xds;
+ int xes;
+ long orig_eax;
+ long eip;
+ int xcs;
+ long eflags;
+ long esp;
+ int xss;
+};
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+#define PTRACE_GETFPXREGS 18
+#define PTRACE_SETFPXREGS 19
+
+#define PTRACE_SETOPTIONS 21
+
+/* options set using PTRACE_SETOPTIONS */
+#define PTRACE_O_TRACESYSGOOD 0x00000001
+
+enum EFLAGS {
+ EF_CF = 0x00000001,
+ EF_PF = 0x00000004,
+ EF_AF = 0x00000010,
+ EF_ZF = 0x00000040,
+ EF_SF = 0x00000080,
+ EF_TF = 0x00000100,
+ EF_IE = 0x00000200,
+ EF_DF = 0x00000400,
+ EF_OF = 0x00000800,
+ EF_IOPL = 0x00003000,
+ EF_IOPL_RING0 = 0x00000000,
+ EF_IOPL_RING1 = 0x00001000,
+ EF_IOPL_RING2 = 0x00002000,
+ EF_NT = 0x00004000, /* nested task */
+ EF_RF = 0x00010000, /* resume */
+ EF_VM = 0x00020000, /* virtual mode */
+ EF_AC = 0x00040000, /* alignment */
+ EF_VIF = 0x00080000, /* virtual interrupt */
+ EF_VIP = 0x00100000, /* virtual interrupt pending */
+ EF_ID = 0x00200000, /* id */
+};
+
+#ifdef __KERNEL__
+#define user_mode(regs) ((3 & (regs)->xcs))
+#define instruction_pointer(regs) ((regs)->eip)
+extern void show_regs(struct pt_regs *);
+#endif
+
+#endif
diff --git a/xen/include/asm-i386/rwlock.h b/xen/include/asm-i386/rwlock.h
new file mode 100644
index 0000000000..9475419f95
--- /dev/null
+++ b/xen/include/asm-i386/rwlock.h
@@ -0,0 +1,83 @@
+/* include/asm-i386/rwlock.h
+ *
+ * Helpers used by both rw spinlocks and rw semaphores.
+ *
+ * Based in part on code from semaphore.h and
+ * spinlock.h Copyright 1996 Linus Torvalds.
+ *
+ * Copyright 1999 Red Hat, Inc.
+ *
+ * Written by Benjamin LaHaise.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_I386_RWLOCK_H
+#define _ASM_I386_RWLOCK_H
+
+#define RW_LOCK_BIAS 0x01000000
+#define RW_LOCK_BIAS_STR "0x01000000"
+
+#define __build_read_lock_ptr(rw, helper) \
+ asm volatile(LOCK "subl $1,(%0)\n\t" \
+ "js 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tcall " helper "\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ ::"a" (rw) : "memory")
+
+#define __build_read_lock_const(rw, helper) \
+ asm volatile(LOCK "subl $1,%0\n\t" \
+ "js 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tpushl %%eax\n\t" \
+ "leal %0,%%eax\n\t" \
+ "call " helper "\n\t" \
+ "popl %%eax\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_read_lock(rw, helper) do { \
+ if (__builtin_constant_p(rw)) \
+ __build_read_lock_const(rw, helper); \
+ else \
+ __build_read_lock_ptr(rw, helper); \
+ } while (0)
+
+#define __build_write_lock_ptr(rw, helper) \
+ asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+ "jnz 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tcall " helper "\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ ::"a" (rw) : "memory")
+
+#define __build_write_lock_const(rw, helper) \
+ asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+ "jnz 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tpushl %%eax\n\t" \
+ "leal %0,%%eax\n\t" \
+ "call " helper "\n\t" \
+ "popl %%eax\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_write_lock(rw, helper) do { \
+ if (__builtin_constant_p(rw)) \
+ __build_write_lock_const(rw, helper); \
+ else \
+ __build_write_lock_ptr(rw, helper); \
+ } while (0)
+
+#endif
diff --git a/xen/include/asm-i386/scatterlist.h b/xen/include/asm-i386/scatterlist.h
new file mode 100644
index 0000000000..9d858415db
--- /dev/null
+++ b/xen/include/asm-i386/scatterlist.h
@@ -0,0 +1,16 @@
+#ifndef _I386_SCATTERLIST_H
+#define _I386_SCATTERLIST_H
+
+struct scatterlist {
+ char * address; /* Location data is to be transferred to, NULL for
+ * highmem page */
+ struct pfn_info * page; /* Location for highmem page, if any */
+ unsigned int offset;/* for highmem, page offset */
+
+ dma_addr_t dma_address;
+ unsigned int length;
+};
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* !(_I386_SCATTERLIST_H) */
diff --git a/xen/include/asm-i386/smp.h b/xen/include/asm-i386/smp.h
new file mode 100644
index 0000000000..cfec568c43
--- /dev/null
+++ b/xen/include/asm-i386/smp.h
@@ -0,0 +1,92 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#ifndef __ASSEMBLY__
+#include <xeno/config.h>
+#include <asm/ptrace.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#endif
+
+#ifdef CONFIG_SMP
+#define TARGET_CPUS cpu_online_map
+#else
+#define TARGET_CPUS 0x01
+#endif
+
+#ifdef CONFIG_SMP
+#ifndef __ASSEMBLY__
+
+/*
+ * Private routines/data
+ */
+
+extern void smp_alloc_memory(void);
+extern unsigned long phys_cpu_present_map;
+extern unsigned long cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_invalidate_rcv(void); /* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+static inline int cpu_logical_map(int cpu)
+{
+ return cpu;
+}
+static inline int cpu_number_map(int cpu)
+{
+ return cpu;
+}
+
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+#define MAX_APICID 256
+extern volatile int cpu_to_physical_apicid[NR_CPUS];
+extern volatile int physical_apicid_to_cpu[MAX_APICID];
+extern volatile int cpu_to_logical_apicid[NR_CPUS];
+extern volatile int logical_apicid_to_cpu[MAX_APICID];
+
+/*
+ * General functions that each host system must provide.
+ */
+
+extern void smp_boot_cpus(void);
+extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+
+#define smp_processor_id() (current->processor)
+
+static __inline int hard_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
+
+static __inline int logical_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
+#endif
diff --git a/xen/include/asm-i386/smpboot.h b/xen/include/asm-i386/smpboot.h
new file mode 100644
index 0000000000..ece215fab0
--- /dev/null
+++ b/xen/include/asm-i386/smpboot.h
@@ -0,0 +1,121 @@
+#ifndef __ASM_SMPBOOT_H
+#define __ASM_SMPBOOT_H
+
+/*emum for clustered_apic_mode values*/
+enum{
+ CLUSTERED_APIC_NONE = 0,
+ CLUSTERED_APIC_XAPIC,
+ CLUSTERED_APIC_NUMAQ
+};
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+extern unsigned int apic_broadcast_id;
+extern unsigned char clustered_apic_mode;
+extern unsigned char esr_disable;
+extern unsigned char int_delivery_mode;
+extern unsigned int int_dest_addr_mode;
+extern int cyclone_setup(char*);
+
+static inline void detect_clustered_apic(char* oem, char* prod)
+{
+ /*
+ * Can't recognize Summit xAPICs at present, so use the OEM ID.
+ */
+ if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){
+ clustered_apic_mode = CLUSTERED_APIC_XAPIC;
+ apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
+ int_dest_addr_mode = APIC_DEST_PHYSICAL;
+ int_delivery_mode = dest_Fixed;
+ esr_disable = 1;
+ /*Start cyclone clock*/
+ cyclone_setup(0);
+ }
+ else if (!strncmp(oem, "IBM NUMA", 8)){
+ clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
+ apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+ int_dest_addr_mode = APIC_DEST_LOGICAL;
+ int_delivery_mode = dest_LowestPrio;
+ esr_disable = 1;
+ }
+}
+#define INT_DEST_ADDR_MODE (int_dest_addr_mode)
+#define INT_DELIVERY_MODE (int_delivery_mode)
+#else /* CONFIG_X86_CLUSTERED_APIC */
+#define apic_broadcast_id (APIC_BROADCAST_ID_APIC)
+#define clustered_apic_mode (CLUSTERED_APIC_NONE)
+#define esr_disable (0)
+#define detect_clustered_apic(x,y)
+#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */
+#define INT_DELIVERY_MODE (dest_LowestPrio)
+#endif /* CONFIG_X86_CLUSTERED_APIC */
+#define BAD_APICID 0xFFu
+
+#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467)
+#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469)
+
+#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid)
+
+extern unsigned char raw_phys_apicid[NR_CPUS];
+
+/*
+ * How to map from the cpu_present_map
+ */
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+ return raw_phys_apicid[mps_cpu];
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ return (mps_cpu/4)*16 + (1<<(mps_cpu%4));
+ return mps_cpu;
+}
+
+static inline unsigned long apicid_to_phys_cpu_present(int apicid)
+{
+ if(clustered_apic_mode)
+ return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3));
+ return 1UL << apicid;
+}
+
+#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) )
+
+/*
+ * Mappings between logical cpu number and logical / physical apicid
+ * The first four macros are trivial, but it keeps the abstraction consistent
+ */
+extern volatile int logical_apicid_2_cpu[];
+extern volatile int cpu_2_logical_apicid[];
+extern volatile int physical_apicid_2_cpu[];
+extern volatile int cpu_2_physical_apicid[];
+
+#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
+#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
+#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */
+#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
+#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */
+#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
+#endif /* CONFIG_MULTIQUAD */
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+static inline int target_cpus(void)
+{
+ static int cpu;
+ switch(clustered_apic_mode){
+ case CLUSTERED_APIC_NUMAQ:
+ /* Broadcast intrs to local quad only. */
+ return APIC_BROADCAST_ID_APIC;
+ case CLUSTERED_APIC_XAPIC:
+ /*round robin the interrupts*/
+ cpu = (cpu+1)%smp_num_cpus;
+ return cpu_to_physical_apicid(cpu);
+ default:
+ }
+ return cpu_online_map;
+}
+#else
+#define target_cpus() (0x01)
+#endif
+#endif
diff --git a/xen/include/asm-i386/softirq.h b/xen/include/asm-i386/softirq.h
new file mode 100644
index 0000000000..254224411b
--- /dev/null
+++ b/xen/include/asm-i386/softirq.h
@@ -0,0 +1,48 @@
+#ifndef __ASM_SOFTIRQ_H
+#define __ASM_SOFTIRQ_H
+
+#include <asm/atomic.h>
+#include <asm/hardirq.h>
+
+#define __cpu_bh_enable(cpu) \
+ do { barrier(); local_bh_count(cpu)--; } while (0)
+#define cpu_bh_disable(cpu) \
+ do { local_bh_count(cpu)++; barrier(); } while (0)
+
+#define local_bh_disable() cpu_bh_disable(smp_processor_id())
+#define __local_bh_enable() __cpu_bh_enable(smp_processor_id())
+
+#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
+
+/*
+ * NOTE: this assembly code assumes:
+ *
+ * (char *)&local_bh_count - 8 == (char *)&softirq_pending
+ *
+ * If you change the offsets in irq_stat then you have to
+ * update this code as well.
+ */
+#define local_bh_enable() \
+do { \
+ unsigned int *ptr = &local_bh_count(smp_processor_id()); \
+ \
+ barrier(); \
+ if (!--*ptr) \
+ __asm__ __volatile__ ( \
+ "cmpl $0, -8(%0);" \
+ "jnz 2f;" \
+ "1:;" \
+ \
+ ".section .text.lock,\"ax\";" \
+ "2: pushl %%eax; pushl %%ecx; pushl %%edx;" \
+ "call %c1;" \
+ "popl %%edx; popl %%ecx; popl %%eax;" \
+ "jmp 1b;" \
+ ".previous;" \
+ \
+ : /* no output */ \
+ : "r" (ptr), "i" (do_softirq) \
+ /* no registers clobbered */ ); \
+} while (0)
+
+#endif /* __ASM_SOFTIRQ_H */
diff --git a/xen/include/asm-i386/spinlock.h b/xen/include/asm-i386/spinlock.h
new file mode 100644
index 0000000000..59dc7b209f
--- /dev/null
+++ b/xen/include/asm-i386/spinlock.h
@@ -0,0 +1,206 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <xeno/config.h>
+#include <xeno/lib.h>
+
+#if 0
+#define SPINLOCK_DEBUG 1
+#else
+#define SPINLOCK_DEBUG 0
+#endif
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+ volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+ unsigned magic;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#if SPINLOCK_DEBUG
+#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations. There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0)
+#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
+ "js 2f\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\t" \
+ "cmpb $0,%0\n\t" \
+ "rep;nop\n\t" \
+ "jle 2b\n\t" \
+ "jmp 1b\n" \
+ ".previous"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+ "movb $1,%0" \
+ :"=m" (lock->lock) : : "memory"
+
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+ if (lock->magic != SPINLOCK_MAGIC)
+ BUG();
+ if (!spin_is_locked(lock))
+ BUG();
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#else
+
+#define spin_unlock_string \
+ "xchgb %b0, %1" \
+ :"=q" (oldval), "=m" (lock->lock) \
+ :"0" (oldval) : "memory"
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ char oldval = 1;
+#if SPINLOCK_DEBUG
+ if (lock->magic != SPINLOCK_MAGIC)
+ BUG();
+ if (!spin_is_locked(lock))
+ BUG();
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#endif
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ char oldval;
+ __asm__ __volatile__(
+ "xchgb %b0,%1"
+ :"=q" (oldval), "=m" (lock->lock)
+ :"0" (0) : "memory");
+ return oldval > 0;
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+ __label__ here;
+here:
+ if (lock->magic != SPINLOCK_MAGIC) {
+printk("eip: %p\n", &&here);
+ BUG();
+ }
+#endif
+ __asm__ __volatile__(
+ spin_lock_string
+ :"=m" (lock->lock) : : "memory");
+}
+
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+ volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+ unsigned magic;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC 0xdeaf1eed
+
+#if SPINLOCK_DEBUG
+#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores. See
+ * semaphore.h for details. -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void read_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+ if (rw->magic != RWLOCK_MAGIC)
+ BUG();
+#endif
+ __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void write_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+ if (rw->magic != RWLOCK_MAGIC)
+ BUG();
+#endif
+ __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int write_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+ return 1;
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/xen/include/asm-i386/system.h b/xen/include/asm-i386/system.h
new file mode 100644
index 0000000000..1ccce595d8
--- /dev/null
+++ b/xen/include/asm-i386/system.h
@@ -0,0 +1,354 @@
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <xeno/config.h>
+#include <asm/bitops.h>
+
+struct task_struct; /* one of the stranger aspects of C forward declarations.. */
+extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
+
+#define prepare_to_switch() do { } while(0)
+#define switch_to(prev,next) do { \
+ asm volatile("pushl %%ebp\n\t" \
+ "pushl %%ebx\n\t" \
+ "pushl %%esi\n\t" \
+ "pushl %%edi\n\t" \
+ "movl %%esp,%0\n\t" /* save ESP */ \
+ "cli\n\t" \
+ "movl %2,%%esp\n\t" /* restore ESP */ \
+ "movl %6,%%cr3\n\t" /* restore pagetables */\
+ "sti\n\t" \
+ "movl $1f,%1\n\t" /* save EIP */ \
+ "pushl %3\n\t" /* restore EIP */ \
+ "jmp __switch_to\n" \
+ "1:\t" \
+ "popl %%edi\n\t" \
+ "popl %%esi\n\t" \
+ "popl %%ebx\n\t" \
+ "popl %%ebp\n\t" \
+ :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \
+ :"m" (next->thread.esp),"m" (next->thread.eip), \
+ "a" (prev), "d" (next), \
+ "c" (pagetable_val(next->mm.pagetable)) \
+ :"memory"); \
+} while (0)
+
+#define _set_base(addr,base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+ "rorl $16,%%edx\n\t" \
+ "movb %%dl,%2\n\t" \
+ "movb %%dh,%3" \
+ :"=&d" (__pr) \
+ :"m" (*((addr)+2)), \
+ "m" (*((addr)+4)), \
+ "m" (*((addr)+7)), \
+ "0" (base) \
+ ); } while(0)
+
+#define _set_limit(addr,limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+ "rorl $16,%%edx\n\t" \
+ "movb %2,%%dh\n\t" \
+ "andb $0xf0,%%dh\n\t" \
+ "orb %%dh,%%dl\n\t" \
+ "movb %%dl,%2" \
+ :"=&d" (__lr) \
+ :"m" (*(addr)), \
+ "m" (*((addr)+6)), \
+ "0" (limit) \
+ ); } while(0)
+
+#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
+#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 )
+
+static inline unsigned long _get_base(char * addr)
+{
+ unsigned long __base;
+ __asm__("movb %3,%%dh\n\t"
+ "movb %2,%%dl\n\t"
+ "shll $16,%%edx\n\t"
+ "movw %1,%%dx"
+ :"=&d" (__base)
+ :"m" (*((addr)+2)),
+ "m" (*((addr)+4)),
+ "m" (*((addr)+7)));
+ return __base;
+}
+
+#define get_base(ldt) _get_base( ((char *)&(ldt)) )
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value) \
+ asm volatile("\n" \
+ "1:\t" \
+ "movl %0,%%" #seg "\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3:\t" \
+ "pushl $0\n\t" \
+ "popl %%" #seg "\n\t" \
+ "jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".align 4\n\t" \
+ ".long 1b,3b\n" \
+ ".previous" \
+ : :"m" (*(unsigned int *)&(value)))
+
+/*
+ * Clear and set 'TS' bit respectively
+ */
+#define clts() __asm__ __volatile__ ("clts")
+#define read_cr0() ({ \
+ unsigned int __dummy; \
+ __asm__( \
+ "movl %%cr0,%0\n\t" \
+ :"=r" (__dummy)); \
+ __dummy; \
+})
+#define write_cr0(x) \
+ __asm__("movl %0,%%cr0": :"r" (x));
+
+#define read_cr4() ({ \
+ unsigned int __dummy; \
+ __asm__( \
+ "movl %%cr4,%0\n\t" \
+ :"=r" (__dummy)); \
+ __dummy; \
+})
+#define write_cr4(x) \
+ __asm__("movl %0,%%cr4": :"r" (x));
+#define stts() write_cr0(8 | read_cr0())
+
+#define wbinvd() \
+ __asm__ __volatile__ ("wbinvd": : :"memory");
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+ unsigned long __limit;
+ __asm__("lsll %1,%0"
+ :"=r" (__limit):"r" (segment));
+ return __limit+1;
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+#define tas(ptr) (xchg((ptr),1))
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+
+/*
+ * The semantics of XCHGCMP8B are a bit strange, this is why
+ * there is a loop and the loading of %%eax and %%edx has to
+ * be inside. This inlines well in most cases, the cached
+ * cost is around ~38 cycles. (in the future we might want
+ * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+ * might have an implicit FPU-save as a cost, so it's not
+ * clear which path to go.)
+ */
+static inline void __set_64bit (unsigned long long * ptr,
+ unsigned int low, unsigned int high)
+{
+ __asm__ __volatile__ (
+ "\n1:\t"
+ "movl (%0), %%eax\n\t"
+ "movl 4(%0), %%edx\n\t"
+ "cmpxchg8b (%0)\n\t"
+ "jnz 1b"
+ : /* no outputs */
+ : "D"(ptr),
+ "b"(low),
+ "c"(high)
+ : "ax","dx","memory");
+}
+
+static inline void __set_64bit_constant (unsigned long long *ptr,
+ unsigned long long value)
+{
+ __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
+}
+#define ll_low(x) *(((unsigned int*)&(x))+0)
+#define ll_high(x) *(((unsigned int*)&(x))+1)
+
+static inline void __set_64bit_var (unsigned long long *ptr,
+ unsigned long long value)
+{
+ __set_64bit(ptr,ll_low(value), ll_high(value));
+}
+
+#define set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit_constant(ptr, value) : \
+ __set_64bit_var(ptr, value) )
+
+#define _set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
+ __set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ * but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("xchgb %b0,%1"
+ :"=q" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 2:
+ __asm__ __volatile__("xchgw %w0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 4:
+ __asm__ __volatile__("xchgl %0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ }
+ return x;
+}
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#ifdef CONFIG_X86_CMPXCHG
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ }
+ return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
+
+#else
+/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */
+#endif
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+
+#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb() mb()
+
+#ifdef CONFIG_X86_OOSTORE
+#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#else
+#define wmb() __asm__ __volatile__ ("": : :"memory")
+#endif
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#endif
+
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+/* interrupt control.. */
+#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
+#define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc")
+#define __cli() __asm__ __volatile__("cli": : :"memory")
+#define __sti() __asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
+
+/* For spinlocks etc */
+#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#define local_irq_restore(x) __restore_flags(x)
+#define local_irq_disable() __cli()
+#define local_irq_enable() __sti()
+
+#ifdef CONFIG_SMP
+
+extern void __global_cli(void);
+extern void __global_sti(void);
+extern unsigned long __global_save_flags(void);
+extern void __global_restore_flags(unsigned long);
+#define cli() __global_cli()
+#define sti() __global_sti()
+#define save_flags(x) ((x)=__global_save_flags())
+#define restore_flags(x) __global_restore_flags(x)
+
+#else
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+
+#endif
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+#define BROKEN_ACPI_Sx 0x0001
+#define BROKEN_INIT_AFTER_S1 0x0002
+
+#endif
diff --git a/xen/include/asm-i386/time.h b/xen/include/asm-i386/time.h
new file mode 100644
index 0000000000..2f834908a7
--- /dev/null
+++ b/xen/include/asm-i386/time.h
@@ -0,0 +1,49 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: time.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: Architecture dependent definition of time variables
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _ASM_TIME_H_
+#define _ASM_TIME_H_
+
+#include <asm/types.h>
+#include <asm/msr.h>
+
+/*
+ * Cycle Counter Time
+ */
+typedef u64 cc_time_t;
+static inline cc_time_t get_cc_time()
+{
+ u64 ret;
+ rdtscll(ret);
+ return ret;
+}
+
+/*
+ * System Time
+ */
+typedef s64 s_time_t; /* System time */
+extern u32 stime_pcc; /* cycle counter value at last timer irq */
+extern s_time_t stime_now; /* time in ns at last timer IRQ */
+
+/*
+ * Domain Virtual Time
+ */
+typedef u64 dv_time_t;
+
+#endif /* _ASM_TIME_H_ */
diff --git a/xen/include/asm-i386/timex.h b/xen/include/asm-i386/timex.h
new file mode 100644
index 0000000000..3eeb5d2b70
--- /dev/null
+++ b/xen/include/asm-i386/timex.h
@@ -0,0 +1,58 @@
+/*
+ * linux/include/asm-i386/timex.h
+ *
+ * i386 architecture timex specifications
+ */
+#ifndef _ASMi386_TIMEX_H
+#define _ASMi386_TIMEX_H
+
+#include <linux/config.h>
+#include <asm/msr.h>
+
+#ifdef CONFIG_MELAN
+# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
+#else
+# define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
+#endif
+
+#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */
+#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \
+ (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
+ << (SHIFT_SCALE-SHIFT_HZ)) / HZ)
+
+/*
+ * Standard way to access the cycle counter on i586+ CPUs.
+ * Currently only used on SMP.
+ *
+ * If you really have a SMP machine with i486 chips or older,
+ * compile for that, and this will just always return zero.
+ * That's ok, it just means that the nicer scheduling heuristics
+ * won't work for you.
+ *
+ * We only use the low 32 bits, and we'd simply better make sure
+ * that we reschedule before that wraps. Scheduling at least every
+ * four billion cycles just basically sounds like a good idea,
+ * regardless of how fast the machine is.
+ */
+typedef unsigned long long cycles_t;
+
+extern cycles_t cacheflush_time;
+
+static inline cycles_t get_cycles (void)
+{
+#ifndef CONFIG_X86_TSC
+ return 0;
+#else
+ unsigned long long ret;
+
+ rdtscll(ret);
+ return ret;
+#endif
+}
+
+extern unsigned long cpu_khz;
+
+#define vxtime_lock() do {} while (0)
+#define vxtime_unlock() do {} while (0)
+
+#endif
diff --git a/xen/include/asm-i386/types.h b/xen/include/asm-i386/types.h
new file mode 100644
index 0000000000..2bd0f258b9
--- /dev/null
+++ b/xen/include/asm-i386/types.h
@@ -0,0 +1,50 @@
+#ifndef _I386_TYPES_H
+#define _I386_TYPES_H
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#include <xeno/config.h>
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+#define BITS_PER_LONG 32
+
+/* DMA addresses come in generic and 64-bit flavours. */
+
+#ifdef CONFIG_HIGHMEM
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+typedef u64 dma64_addr_t;
+
+#endif
diff --git a/xen/include/asm-i386/uaccess.h b/xen/include/asm-i386/uaccess.h
new file mode 100644
index 0000000000..ba19cfb2b3
--- /dev/null
+++ b/xen/include/asm-i386/uaccess.h
@@ -0,0 +1,600 @@
+#ifndef __i386_UACCESS_H
+#define __i386_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/prefetch.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+
+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds() (KERNEL_DS)
+#define get_fs() (current->addr_limit)
+#define set_fs(x) (current->addr_limit = (x))
+
+#define segment_eq(a,b) ((a).seg == (b).seg)
+
+extern int __verify_write(const void *, unsigned long);
+
+#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg))
+
+/*
+ * Uhhuh, this needs 33-bit arithmetic. We have a carry..
+ */
+#define __range_ok(addr,size) ({ \
+ unsigned long flag,sum; \
+ asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
+ :"=&r" (flag), "=r" (sum) \
+ :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \
+ flag; })
+
+#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
+
+static inline int verify_area(int type, const void * addr, unsigned long size)
+{
+ return access_ok(type,addr,size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+ unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise. */
+extern unsigned long search_exception_table(unsigned long);
+
+
+/*
+ * These are the main single-value transfer routines. They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the uglyness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+
+#define __get_user_x(size,ret,x,ptr) \
+ __asm__ __volatile__("call __get_user_" #size \
+ :"=a" (ret),"=d" (x) \
+ :"0" (ptr))
+
+/* Careful: we have to cast the result to the type of the pointer for sign reasons */
+#define get_user(x,ptr) \
+({ int __ret_gu=1,__val_gu; \
+ switch(sizeof (*(ptr))) { \
+ case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break; \
+ case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break; \
+ case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break; \
+ default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break; \
+ /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \
+ /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \
+ /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \
+ /*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/ \
+ } \
+ (x) = (__typeof__(*(ptr)))__val_gu; \
+ __ret_gu; \
+})
+
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+extern void __put_user_bad(void);
+
+#define put_user(x,ptr) \
+ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __get_user(x,ptr) \
+ __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr) \
+ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size) \
+({ \
+ long __pu_err; \
+ __put_user_size((x),(ptr),(size),__pu_err); \
+ __pu_err; \
+})
+
+
+#define __put_user_check(x,ptr,size) \
+({ \
+ long __pu_err = -EFAULT; \
+ __typeof__(*(ptr)) *__pu_addr = (ptr); \
+ if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
+ __put_user_size((x),__pu_addr,(size),__pu_err); \
+ __pu_err; \
+})
+
+#define __put_user_u64(x, addr, err) \
+ __asm__ __volatile__( \
+ "1: movl %%eax,0(%2)\n" \
+ "2: movl %%edx,4(%2)\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: movl %3,%0\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,4b\n" \
+ " .long 2b,4b\n" \
+ ".previous" \
+ : "=r"(err) \
+ : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
+
+#define __put_user_size(x,ptr,size,retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \
+ case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \
+ case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \
+ case 8: __put_user_u64(x,ptr,retval); break; \
+ default: __put_user_bad(); \
+ } \
+} while (0)
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %"rtype"1,%2\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl %3,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=r"(err) \
+ : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+#define __get_user_nocheck(x,ptr,size) \
+({ \
+ long __gu_err, __gu_val; \
+ __get_user_size(__gu_val,(ptr),(size),__gu_err); \
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ __gu_err; \
+})
+
+extern long __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \
+ case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \
+ case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \
+ default: (x) = __get_user_bad(); \
+ } \
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %2,%"rtype"1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl %3,%0\n" \
+ " xor"itype" %"rtype"1,%"rtype"1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=r"(err), ltype (x) \
+ : "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+/*
+ * Copy To/From Userspace
+ */
+
+/* Generic arbitrary sized copy. */
+#define __copy_user(to,from,size) \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ " movl %3,%0\n" \
+ "1: rep; movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: lea 0(%3,%0,4),%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,2b\n" \
+ ".previous" \
+ : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
+ : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
+ : "memory"); \
+} while (0)
+
+#define __copy_user_zeroing(to,from,size) \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ " movl %3,%0\n" \
+ "1: rep; movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: lea 0(%3,%0,4),%0\n" \
+ "4: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosb\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
+ : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
+ : "memory"); \
+} while (0)
+
+/* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+ */
+static inline unsigned long
+__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __copy_user_zeroing(to,from,n);
+ return n;
+}
+
+static inline unsigned long
+__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __copy_user(to,from,n);
+ return n;
+}
+
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user(to, from, size) \
+do { \
+ int __d0, __d1; \
+ switch (size & 3) { \
+ default: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "2: shl $2,%0\n" \
+ " jmp 1b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,2b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 1: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: shl $2,%0\n" \
+ "4: incl %0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 2: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: shl $2,%0\n" \
+ "4: addl $2,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 3: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2: movsb\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: shl $2,%0\n" \
+ "5: addl $2,%0\n" \
+ "6: incl %0\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,4b\n" \
+ " .long 1b,5b\n" \
+ " .long 2b,6b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user_zeroing(to, from, size) \
+do { \
+ int __d0, __d1; \
+ switch (size & 3) { \
+ default: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "2: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " jmp 1b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,2b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 1: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " incl %0\n" \
+ " jmp 2b\n" \
+ "4: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " incl %0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 2: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " stosw\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " addl $2,%0\n" \
+ " jmp 2b\n" \
+ "4: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosw\n" \
+ " popl %%eax\n" \
+ " addl $2,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 3: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2: movsb\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " stosw\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " addl $3,%0\n" \
+ " jmp 2b\n" \
+ "5: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosw\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " addl $3,%0\n" \
+ " jmp 2b\n" \
+ "6: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " incl %0\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,4b\n" \
+ " .long 1b,5b\n" \
+ " .long 2b,6b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
+unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
+
+static inline unsigned long
+__constant_copy_to_user(void *to, const void *from, unsigned long n)
+{
+ prefetch(from);
+ if (access_ok(VERIFY_WRITE, to, n))
+ __constant_copy_user(to,from,n);
+ return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ __constant_copy_user_zeroing(to,from,n);
+ else
+ memset(to, 0, n);
+ return n;
+}
+
+static inline unsigned long
+__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __constant_copy_user(to,from,n);
+ return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __constant_copy_user_zeroing(to,from,n);
+ return n;
+}
+
+#define copy_to_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_to_user((to),(from),(n)) : \
+ __generic_copy_to_user((to),(from),(n)))
+
+#define copy_from_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_from_user((to),(from),(n)) : \
+ __generic_copy_from_user((to),(from),(n)))
+
+#define __copy_to_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_to_user_nocheck((to),(from),(n)) : \
+ __generic_copy_to_user_nocheck((to),(from),(n)))
+
+#define __copy_from_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_from_user_nocheck((to),(from),(n)) : \
+ __generic_copy_from_user_nocheck((to),(from),(n)))
+
+long strncpy_from_user(char *dst, const char *src, long count);
+long __strncpy_from_user(char *dst, const char *src, long count);
+#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+long strnlen_user(const char *str, long n);
+unsigned long clear_user(void *mem, unsigned long len);
+unsigned long __clear_user(void *mem, unsigned long len);
+
+#endif /* __i386_UACCESS_H */
diff --git a/xen/include/asm-i386/unaligned.h b/xen/include/asm-i386/unaligned.h
new file mode 100644
index 0000000000..7acd795762
--- /dev/null
+++ b/xen/include/asm-i386/unaligned.h
@@ -0,0 +1,37 @@
+#ifndef __I386_UNALIGNED_H
+#define __I386_UNALIGNED_H
+
+/*
+ * The i386 can do unaligned accesses itself.
+ *
+ * The strange macros are there to make sure these can't
+ * be misused in a way that makes them not work on other
+ * architectures where unaligned accesses aren't as simple.
+ */
+
+/**
+ * get_unaligned - get value from possibly mis-aligned location
+ * @ptr: pointer to value
+ *
+ * This macro should be used for accessing values larger in size than
+ * single bytes at locations that are expected to be improperly aligned,
+ * e.g. retrieving a u16 value from a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define get_unaligned(ptr) (*(ptr))
+
+/**
+ * put_unaligned - put value to a possibly mis-aligned location
+ * @val: value to place
+ * @ptr: pointer to location
+ *
+ * This macro should be used for placing values larger in size than
+ * single bytes at locations that are expected to be improperly aligned,
+ * e.g. writing a u16 value to a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+#endif
diff --git a/xen/include/hypervisor-ifs/block.h b/xen/include/hypervisor-ifs/block.h
new file mode 100644
index 0000000000..627055bf0b
--- /dev/null
+++ b/xen/include/hypervisor-ifs/block.h
@@ -0,0 +1,78 @@
+/******************************************************************************
+ * block.h
+ *
+ * Block IO communication rings.
+ *
+ * These are the ring data structures for buffering messages between
+ * the hypervisor and guestos's.
+ *
+ */
+
+#ifndef __BLOCK_H__
+#define __BLOCK_H__
+
+#include <linux/kdev_t.h>
+
+/* the first four definitions match fs.h */
+#define XEN_BLOCK_READ 0
+#define XEN_BLOCK_WRITE 1
+#define XEN_BLOCK_READA 2 /* currently unused */
+#define XEN_BLOCK_SPECIAL 4 /* currently unused */
+#define XEN_BLOCK_PROBE 8 /* determine io configuration from hypervisor */
+#define XEN_BLOCK_DEBUG 16 /* debug */
+
+#define BLK_RING_SIZE 128
+#define BLK_RING_MAX_ENTRIES (BLK_RING_SIZE - 2)
+#define BLK_RING_INC(_i) (((_i)+1) & (BLK_RING_SIZE-1))
+#define BLK_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RING_SIZE-1))
+
+typedef struct blk_ring_req_entry
+{
+ void * id; /* for guest os use */
+ int operation; /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */
+ char * buffer;
+ unsigned long block_number; /* block number */
+ unsigned short block_size; /* block size */
+ kdev_t device;
+ unsigned long sector_number; /* real buffer location on disk */
+} blk_ring_req_entry_t;
+
+typedef struct blk_ring_resp_entry
+{
+ void *id;
+ unsigned long status;
+} blk_ring_resp_entry_t;
+
+typedef struct blk_ring_st
+{
+ unsigned int req_prod; /* Request producer. Updated by guest OS. */
+ unsigned int resp_prod; /* Response producer. Updated by Xen. */
+ union {
+ blk_ring_req_entry_t req;
+ blk_ring_resp_entry_t resp;
+ } ring[BLK_RING_SIZE];
+} blk_ring_t;
+
+#define MAX_XEN_DISK_COUNT 100
+
+#define XEN_DISK_IDE 1
+#define XEN_DISK_SCSI 2
+
+typedef struct xen_disk /* physical disk */
+{
+ int type; /* disk type */
+ unsigned long capacity;
+ unsigned char heads; /* hdreg.h::hd_geometry */
+ unsigned char sectors; /* hdreg.h::hd_geometry */
+ unsigned int cylinders; /* hdreg.h::hd_big_geometry */
+ unsigned long start; /* hdreg.h::hd_geometry */
+ void * gendisk; /* struct gendisk ptr */
+} xen_disk_t;
+
+typedef struct xen_disk_info
+{
+ int count; /* number of subsequent xen_disk_t structures to follow */
+ xen_disk_t disks[100];
+} xen_disk_info_t;
+
+#endif
diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h
new file mode 100644
index 0000000000..6ecac5848e
--- /dev/null
+++ b/xen/include/hypervisor-ifs/hypervisor-if.h
@@ -0,0 +1,209 @@
+/******************************************************************************
+ * hypervisor-if.h
+ *
+ * Interface to Xeno hypervisor.
+ */
+
+#include "network.h"
+#include "block.h"
+
+#ifndef __HYPERVISOR_IF_H__
+#define __HYPERVISOR_IF_H__
+
+/*
+ * Virtual addresses beyond this are not modifiable by guest OSes.
+ * The machine->physical mapping table starts at this address, read-only
+ * to all domains except DOM0.
+ */
+#define HYPERVISOR_VIRT_START (0xFC000000UL)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+typedef struct trap_info_st
+{
+ unsigned char vector; /* exception/interrupt vector */
+ unsigned char dpl; /* privilege level */
+ unsigned short cs; /* code selector */
+ unsigned long address; /* code address */
+} trap_info_t;
+
+
+typedef struct
+{
+/*
+ * PGREQ_XXX: specified in least-significant bits of 'ptr' field.
+ * All requests specify relevent PTE or PT address in 'ptr'.
+ * Normal requests specify update value in 'value'.
+ * Extended requests specify command in least 8 bits of 'value'.
+ */
+/* A normal page-table update request. */
+#define PGREQ_NORMAL 0
+/* Update an entry in the machine->physical mapping table. */
+#define PGREQ_MPT_UPDATE 1
+/* An extended command. */
+#define PGREQ_EXTENDED_COMMAND 2
+/* DOM0 can make entirely unchecked updates which do not affect refcnts. */
+#define PGREQ_UNCHECKED_UPDATE 3
+ unsigned long ptr, val; /* *ptr = val */
+/* Announce a new top-level page table. */
+#define PGEXT_PIN_L1_TABLE 0
+#define PGEXT_PIN_L2_TABLE 1
+#define PGEXT_PIN_L3_TABLE 2
+#define PGEXT_PIN_L4_TABLE 3
+#define PGEXT_UNPIN_TABLE 4
+#define PGEXT_NEW_BASEPTR 5
+#define PGEXT_TLB_FLUSH 6
+#define PGEXT_INVLPG 7
+#define PGEXT_CMD_MASK 255
+#define PGEXT_CMD_SHIFT 8
+} page_update_request_t;
+
+
+/*
+ * Segment descriptor tables.
+ */
+/* 8 entries, plus a TSS entry for each CPU (up to 32 CPUs). */
+#define FIRST_DOMAIN_GDT_ENTRY 40
+/* These are flat segments for domain bootstrap and fallback. */
+#define FLAT_RING1_CS 0x11
+#define FLAT_RING1_DS 0x19
+#define FLAT_RING3_CS 0x23
+#define FLAT_RING3_DS 0x2b
+
+
+/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */
+
+#define __HYPERVISOR_set_trap_table 0
+#define __HYPERVISOR_pt_update 1
+#define __HYPERVISOR_console_write 2
+#define __HYPERVISOR_set_gdt 3
+#define __HYPERVISOR_stack_and_ldt_switch 4
+#define __HYPERVISOR_net_update 5
+#define __HYPERVISOR_fpu_taskswitch 6
+#define __HYPERVISOR_sched_op 7
+#define __HYPERVISOR_exit 8
+#define __HYPERVISOR_dom0_op 9
+#define __HYPERVISOR_network_op 10
+#define __HYPERVISOR_block_io_op 11
+#define __HYPERVISOR_set_debugreg 12
+#define __HYPERVISOR_get_debugreg 13
+#define __HYPERVISOR_update_descriptor 14
+#define __HYPERVISOR_set_fast_trap 15
+
+#define TRAP_INSTR "int $0x82"
+
+
+/* Event message note:
+ *
+ * Here, as in the interrupts to the guestos, additional network interfaces
+ * are defined. These definitions server as placeholders for the event bits,
+ * however, in the code these events will allways be referred to as shifted
+ * offsets from the base NET events.
+ */
+
+/* Events that a guest OS may receive from the hypervisor. */
+#define EVENT_BLK_RESP 0x01 /* A block device response has been queued. */
+#define EVENT_TIMER 0x02 /* A timeout has been updated. */
+#define EVENT_DIE 0x04 /* OS is about to be killed. Clean up please! */
+#define EVENT_DEBUG 0x08 /* Request guest to dump debug info (gross!) */
+#define EVENT_NET_TX 0x10 /* There are packets for transmission. */
+#define EVENT_NET_RX 0x20 /* There are empty buffers for receive. */
+
+/* Bit offsets, as opposed to the above masks. */
+#define _EVENT_BLK_RESP 0
+#define _EVENT_TIMER 1
+#define _EVENT_DIE 2
+#define _EVENT_NET_TX 3
+#define _EVENT_NET_RX 4
+#define _EVENT_DEBUG 5
+
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct shared_info_st {
+
+ /* Bitmask of outstanding event notifications hypervisor -> guest OS. */
+ unsigned long events;
+ /*
+ * Hypervisor will only signal event delivery via the "callback
+ * exception" when this value is non-zero. Hypervisor clears this when
+ * notiying the guest OS -- this prevents unbounded reentrancy and
+ * stack overflow (in this way, acts as an interrupt-enable flag).
+ */
+ unsigned long events_enable;
+
+ /*
+ * Address for callbacks hypervisor -> guest OS.
+ * Stack frame looks like that of an interrupt.
+ * Code segment is the default flat selector.
+ * This handler will only be called when events_enable is non-zero.
+ */
+ unsigned long event_address;
+
+ /*
+ * Hypervisor uses this callback when it takes a fault on behalf of
+ * an application. This can happen when returning from interrupts for
+ * example: various faults can occur when reloading the segment
+ * registers, and executing 'iret'.
+ * This callback is provided with an extended stack frame, augmented
+ * with saved values for segment registers %ds and %es:
+ * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss]
+ * Code segment is the default flat selector.
+ * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!!
+ */
+ unsigned long failsafe_address;
+
+ /*
+ * Time:
+ * The following abstractions are exposed: System Time, Wall Clock
+ * Time, Domain Virtual Time. Domains can access Cycle counter time
+ * directly.
+ * XXX RN: Need something to pass NTP scaling to GuestOS.
+ */
+
+ u64 cpu_freq; /* to calculate ticks -> real time */
+
+ /* System Time */
+ long long system_time; /* in ns */
+ unsigned long st_timestamp; /* cyclecounter at last update */
+
+ /* Wall Clock Time */
+ u32 wc_version; /* a version number for info below */
+ long tv_sec; /* essentially a struct timeval */
+ long tv_usec;
+ long long wc_timestamp; /* system time at last update */
+
+ /* Domain Virtual Time */
+ unsigned long long domain_time;
+
+ /*
+ * Timeout values:
+ * Allow a domain to specify a timeout value in system time and
+ * domain virtual time.
+ */
+ unsigned long long wall_timeout;
+ unsigned long long domain_timeout;
+
+} shared_info_t;
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct start_info_st {
+ unsigned long nr_pages; /* total pages allocated to this domain */
+ shared_info_t *shared_info; /* VIRTUAL address of shared info struct */
+ unsigned long pt_base; /* VIRTUAL address of page directory */
+ unsigned long mod_start; /* VIRTUAL address of pre-loaded module */
+ unsigned long mod_len; /* size (bytes) of pre-loaded module */
+ net_ring_t *net_rings; /* network rings (VIRTUAL ADDRESS) */
+ int num_net_rings;
+ unsigned long blk_ring; /* block io ring (MACHINE ADDRESS) */
+ unsigned char cmd_line[1]; /* variable-length */
+} start_info_t;
+
+/* For use in guest OSes. */
+extern shared_info_t *HYPERVISOR_shared_info;
+
+#endif /* __HYPERVISOR_IF_H__ */
diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h
new file mode 100644
index 0000000000..1e4e7e1c53
--- /dev/null
+++ b/xen/include/hypervisor-ifs/network.h
@@ -0,0 +1,131 @@
+/******************************************************************************
+ * network.h
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's. As it stands this is only used for network buffer exchange.
+ *
+ * This file also contains structures and interfaces for the per-domain
+ * routing/filtering tables in the hypervisor.
+ *
+ */
+
+#ifndef __RING_H__
+#define __RING_H__
+
+#include <linux/types.h>
+
+typedef struct tx_entry_st {
+ unsigned long addr; /* machine address of packet */
+ unsigned short size; /* in bytes */
+ unsigned short status; /* per descriptor status. */
+} tx_entry_t;
+
+typedef struct rx_entry_st {
+ unsigned long addr; /* machine address of PTE to swizzle */
+ unsigned short size; /* in bytes */
+ unsigned short status; /* per descriptor status. */
+} rx_entry_t;
+
+#define TX_RING_SIZE 256
+#define RX_RING_SIZE 256
+typedef struct net_ring_st {
+ /*
+ * Guest OS places packets into ring at tx_prod.
+ * Hypervisor removes at tx_cons.
+ * Ring is empty when tx_prod == tx_cons.
+ * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event.
+ * Hypervisor may be prodded whenever tx_prod is updated, but this is
+ * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled).
+ */
+ tx_entry_t *tx_ring;
+ unsigned int tx_prod, tx_cons, tx_event;
+
+ /*
+ * Guest OS places empty buffers into ring at rx_prod.
+ * Hypervisor fills buffers as rx_cons.
+ * Ring is empty when rx_prod == rx_cons.
+ * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event.
+ * Hypervisor may be prodded whenever rx_prod is updated, but this is
+ * only necessary when rx_cons == old_rx_prod (ie. receiver stalled).
+ */
+ rx_entry_t *rx_ring;
+ unsigned int rx_prod, rx_cons, rx_event;
+} net_ring_t;
+
+/* Specify base of per-domain array. Get returned free slot in the array. */
+/*net_ring_t *create_net_vif(int domain);*/
+
+/* Packet routing/filtering code follows:
+ */
+
+#define NETWORK_ACTION_ACCEPT 0
+#define NETWORK_ACTION_COUNT 1
+
+#define NETWORK_PROTO_ANY 0
+#define NETWORK_PROTO_IP 1
+#define NETWORK_PROTO_TCP 2
+#define NETWORK_PROTO_UDP 3
+#define NETWORK_PROTO_ARP 4
+
+typedef struct net_rule_st
+{
+ u32 src_addr;
+ u32 dst_addr;
+ u16 src_port;
+ u16 dst_port;
+ u32 src_addr_mask;
+ u32 dst_addr_mask;
+ u16 src_port_mask;
+ u16 dst_port_mask;
+ u16 proto;
+
+ int src_interface;
+ int dst_interface;
+ u16 action;
+} net_rule_t;
+
+typedef struct vif_query_st
+{
+ unsigned int domain;
+ char *buf; // where to put the reply -- guest virtual address
+} vif_query_t;
+
+/* Network trap operations and associated structure.
+ * This presently just handles rule insertion and deletion, but will
+ * evenually have code to add and remove interfaces.
+ */
+
+#define NETWORK_OP_ADDRULE 0
+#define NETWORK_OP_DELETERULE 1
+#define NETWORK_OP_GETRULELIST 2
+#define NETWORK_OP_VIFQUERY 3
+
+typedef struct network_op_st
+{
+ unsigned long cmd;
+ union
+ {
+ net_rule_t net_rule;
+ vif_query_t vif_query;
+ }
+ u;
+} network_op_t;
+
+typedef struct net_rule_ent_st
+{
+ net_rule_t r;
+ struct net_rule_ent_st *next;
+} net_rule_ent_t;
+
+/* Drop a new rule down to the network tables. */
+int add_net_rule(net_rule_t *rule);
+
+
+/* Descriptor status values:
+ */
+
+#define RING_STATUS_OK 0 // Everything is gravy.
+#define RING_STATUS_ERR_CFU -1 // Copy from user problems.
+#define RING_STATUS_BAD_PAGE -2 // What they gave us was pure evil.
+
+#endif
diff --git a/xen/include/scsi/scsi.h b/xen/include/scsi/scsi.h
new file mode 100644
index 0000000000..ffcb419482
--- /dev/null
+++ b/xen/include/scsi/scsi.h
@@ -0,0 +1,237 @@
+#ifndef _LINUX_SCSI_H
+#define _LINUX_SCSI_H
+
+/*
+ * This header file contains public constants and structures used by
+ * the scsi code for linux.
+ */
+
+/*
+ $Header: /usr/src/linux/include/linux/RCS/scsi.h,v 1.3 1993/09/24 12:20:33 drew Exp $
+
+ For documentation on the OPCODES, MESSAGES, and SENSE values,
+ please consult the SCSI standard.
+
+*/
+
+/*
+ * SCSI opcodes
+ */
+
+#define TEST_UNIT_READY 0x00
+#define REZERO_UNIT 0x01
+#define REQUEST_SENSE 0x03
+#define FORMAT_UNIT 0x04
+#define READ_BLOCK_LIMITS 0x05
+#define REASSIGN_BLOCKS 0x07
+#define READ_6 0x08
+#define WRITE_6 0x0a
+#define SEEK_6 0x0b
+#define READ_REVERSE 0x0f
+#define WRITE_FILEMARKS 0x10
+#define SPACE 0x11
+#define INQUIRY 0x12
+#define RECOVER_BUFFERED_DATA 0x14
+#define MODE_SELECT 0x15
+#define RESERVE 0x16
+#define RELEASE 0x17
+#define COPY 0x18
+#define ERASE 0x19
+#define MODE_SENSE 0x1a
+#define START_STOP 0x1b
+#define RECEIVE_DIAGNOSTIC 0x1c
+#define SEND_DIAGNOSTIC 0x1d
+#define ALLOW_MEDIUM_REMOVAL 0x1e
+
+#define SET_WINDOW 0x24
+#define READ_CAPACITY 0x25
+#define READ_10 0x28
+#define WRITE_10 0x2a
+#define SEEK_10 0x2b
+#define WRITE_VERIFY 0x2e
+#define VERIFY 0x2f
+#define SEARCH_HIGH 0x30
+#define SEARCH_EQUAL 0x31
+#define SEARCH_LOW 0x32
+#define SET_LIMITS 0x33
+#define PRE_FETCH 0x34
+#define READ_POSITION 0x34
+#define SYNCHRONIZE_CACHE 0x35
+#define LOCK_UNLOCK_CACHE 0x36
+#define READ_DEFECT_DATA 0x37
+#define MEDIUM_SCAN 0x38
+#define COMPARE 0x39
+#define COPY_VERIFY 0x3a
+#define WRITE_BUFFER 0x3b
+#define READ_BUFFER 0x3c
+#define UPDATE_BLOCK 0x3d
+#define READ_LONG 0x3e
+#define WRITE_LONG 0x3f
+#define CHANGE_DEFINITION 0x40
+#define WRITE_SAME 0x41
+#define READ_TOC 0x43
+#define LOG_SELECT 0x4c
+#define LOG_SENSE 0x4d
+#define MODE_SELECT_10 0x55
+#define RESERVE_10 0x56
+#define RELEASE_10 0x57
+#define MODE_SENSE_10 0x5a
+#define PERSISTENT_RESERVE_IN 0x5e
+#define PERSISTENT_RESERVE_OUT 0x5f
+#define MOVE_MEDIUM 0xa5
+#define READ_12 0xa8
+#define WRITE_12 0xaa
+#define WRITE_VERIFY_12 0xae
+#define SEARCH_HIGH_12 0xb0
+#define SEARCH_EQUAL_12 0xb1
+#define SEARCH_LOW_12 0xb2
+#define READ_ELEMENT_STATUS 0xb8
+#define SEND_VOLUME_TAG 0xb6
+#define WRITE_LONG_2 0xea
+
+/*
+ * Status codes
+ */
+
+#define GOOD 0x00
+#define CHECK_CONDITION 0x01
+#define CONDITION_GOOD 0x02
+#define BUSY 0x04
+#define INTERMEDIATE_GOOD 0x08
+#define INTERMEDIATE_C_GOOD 0x0a
+#define RESERVATION_CONFLICT 0x0c
+#define COMMAND_TERMINATED 0x11
+#define QUEUE_FULL 0x14
+
+#define STATUS_MASK 0x3e
+
+/*
+ * SENSE KEYS
+ */
+
+#define NO_SENSE 0x00
+#define RECOVERED_ERROR 0x01
+#define NOT_READY 0x02
+#define MEDIUM_ERROR 0x03
+#define HARDWARE_ERROR 0x04
+#define ILLEGAL_REQUEST 0x05
+#define UNIT_ATTENTION 0x06
+#define DATA_PROTECT 0x07
+#define BLANK_CHECK 0x08
+#define COPY_ABORTED 0x0a
+#define ABORTED_COMMAND 0x0b
+#define VOLUME_OVERFLOW 0x0d
+#define MISCOMPARE 0x0e
+
+
+/*
+ * DEVICE TYPES
+ */
+
+#define TYPE_DISK 0x00
+#define TYPE_TAPE 0x01
+#define TYPE_PRINTER 0x02
+#define TYPE_PROCESSOR 0x03 /* HP scanners use this */
+#define TYPE_WORM 0x04 /* Treated as ROM by our system */
+#define TYPE_ROM 0x05
+#define TYPE_SCANNER 0x06
+#define TYPE_MOD 0x07 /* Magneto-optical disk -
+ * - treated as TYPE_DISK */
+#define TYPE_MEDIUM_CHANGER 0x08
+#define TYPE_COMM 0x09 /* Communications device */
+#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */
+#define TYPE_NO_LUN 0x7f
+
+/*
+ * standard mode-select header prepended to all mode-select commands
+ *
+ * moved here from cdrom.h -- kraxel
+ */
+
+struct ccs_modesel_head
+{
+ u_char _r1; /* reserved */
+ u_char medium; /* device-specific medium type */
+ u_char _r2; /* reserved */
+ u_char block_desc_length; /* block descriptor length */
+ u_char density; /* device-specific density code */
+ u_char number_blocks_hi; /* number of blocks in this block desc */
+ u_char number_blocks_med;
+ u_char number_blocks_lo;
+ u_char _r3;
+ u_char block_length_hi; /* block length for blocks in this desc */
+ u_char block_length_med;
+ u_char block_length_lo;
+};
+
+/*
+ * MESSAGE CODES
+ */
+
+#define COMMAND_COMPLETE 0x00
+#define EXTENDED_MESSAGE 0x01
+#define EXTENDED_MODIFY_DATA_POINTER 0x00
+#define EXTENDED_SDTR 0x01
+#define EXTENDED_EXTENDED_IDENTIFY 0x02 /* SCSI-I only */
+#define EXTENDED_WDTR 0x03
+#define SAVE_POINTERS 0x02
+#define RESTORE_POINTERS 0x03
+#define DISCONNECT 0x04
+#define INITIATOR_ERROR 0x05
+#define ABORT 0x06
+#define MESSAGE_REJECT 0x07
+#define NOP 0x08
+#define MSG_PARITY_ERROR 0x09
+#define LINKED_CMD_COMPLETE 0x0a
+#define LINKED_FLG_CMD_COMPLETE 0x0b
+#define BUS_DEVICE_RESET 0x0c
+
+#define INITIATE_RECOVERY 0x0f /* SCSI-II only */
+#define RELEASE_RECOVERY 0x10 /* SCSI-II only */
+
+#define SIMPLE_QUEUE_TAG 0x20
+#define HEAD_OF_QUEUE_TAG 0x21
+#define ORDERED_QUEUE_TAG 0x22
+
+/*
+ * Here are some scsi specific ioctl commands which are sometimes useful.
+ */
+/* These are a few other constants only used by scsi devices */
+/* Note that include/linux/cdrom.h also defines IOCTL 0x5300 - 0x5395 */
+
+#define SCSI_IOCTL_GET_IDLUN 0x5382 /* conflicts with CDROMAUDIOBUFSIZ */
+
+/* Used to turn on and off tagged queuing for scsi devices */
+
+#define SCSI_IOCTL_TAGGED_ENABLE 0x5383
+#define SCSI_IOCTL_TAGGED_DISABLE 0x5384
+
+/* Used to obtain the host number of a device. */
+#define SCSI_IOCTL_PROBE_HOST 0x5385
+
+/* Used to get the bus number for a device */
+#define SCSI_IOCTL_GET_BUS_NUMBER 0x5386
+
+/* Used to get the PCI location of a device */
+#define SCSI_IOCTL_GET_PCI 0x5387
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
+
+#endif
diff --git a/xen/include/scsi/scsi_ioctl.h b/xen/include/scsi/scsi_ioctl.h
new file mode 100644
index 0000000000..937cadfb91
--- /dev/null
+++ b/xen/include/scsi/scsi_ioctl.h
@@ -0,0 +1,51 @@
+#ifndef _SCSI_IOCTL_H
+#define _SCSI_IOCTL_H
+
+#define SCSI_IOCTL_SEND_COMMAND 1
+#define SCSI_IOCTL_TEST_UNIT_READY 2
+#define SCSI_IOCTL_BENCHMARK_COMMAND 3
+#define SCSI_IOCTL_SYNC 4 /* Request synchronous parameters */
+#define SCSI_IOCTL_START_UNIT 5
+#define SCSI_IOCTL_STOP_UNIT 6
+/* The door lock/unlock constants are compatible with Sun constants for
+ the cdrom */
+#define SCSI_IOCTL_DOORLOCK 0x5380 /* lock the eject mechanism */
+#define SCSI_IOCTL_DOORUNLOCK 0x5381 /* unlock the mechanism */
+
+#define SCSI_REMOVAL_PREVENT 1
+#define SCSI_REMOVAL_ALLOW 0
+
+#ifdef __KERNEL__
+
+/*
+ * Structures used for scsi_ioctl et al.
+ */
+
+typedef struct scsi_ioctl_command {
+ unsigned int inlen;
+ unsigned int outlen;
+ unsigned char data[0];
+} Scsi_Ioctl_Command;
+
+typedef struct scsi_idlun {
+ __u32 dev_id;
+ __u32 host_unique_id;
+} Scsi_Idlun;
+
+/* Fibre Channel WWN, port_id struct */
+typedef struct scsi_fctargaddress
+{
+ __u32 host_port_id;
+ unsigned char host_wwn[8]; // include NULL term.
+} Scsi_FCTargAddress;
+
+extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int scsi_ioctl_send_command(Scsi_Device *dev,
+ Scsi_Ioctl_Command *arg);
+
+#endif
+
+#endif
+
+
diff --git a/xen/include/scsi/scsicam.h b/xen/include/scsi/scsicam.h
new file mode 100644
index 0000000000..13e9378f55
--- /dev/null
+++ b/xen/include/scsi/scsicam.h
@@ -0,0 +1,19 @@
+/*
+ * scsicam.h - SCSI CAM support functions, use for HDIO_GETGEO, etc.
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@Colorado.EDU
+ * +1 (303) 786-7975
+ *
+ * For more information, please consult the SCSI-CAM draft.
+ */
+
+#ifndef SCSICAM_H
+#define SCSICAM_H
+#include <xeno/kdev_t.h>
+extern int scsicam_bios_param (Disk *disk, kdev_t dev, int *ip);
+extern int scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+ unsigned int *cyls, unsigned int *hds, unsigned int *secs);
+#endif /* def SCSICAM_H */
diff --git a/xen/include/scsi/sg.h b/xen/include/scsi/sg.h
new file mode 100644
index 0000000000..ccb47c88bb
--- /dev/null
+++ b/xen/include/scsi/sg.h
@@ -0,0 +1,330 @@
+#ifndef _SCSI_GENERIC_H
+#define _SCSI_GENERIC_H
+
+/*
+ History:
+ Started: Aug 9 by Lawrence Foard (entropy@world.std.com), to allow user
+ process control of SCSI devices.
+ Development Sponsored by Killy Corp. NY NY
+Original driver (sg.h):
+* Copyright (C) 1992 Lawrence Foard
+Version 2 and 3 extensions to driver:
+* Copyright (C) 1998 - 2002 Douglas Gilbert
+
+ Version: 3.1.23 (20020318)
+ This version is for 2.4 series kernels.
+
+ Changes since 3.1.22 (20011208)
+ - change EACCES to EPERM when O_RDONLY is insufficient
+ - suppress newlines in host string ( /proc/scsi/sg/host_strs output)
+ - fix xfer direction, old interface, short reply_len [Travers Carter]
+ Changes since 3.1.21 (20011029)
+ - add support for SG_FLAG_MMAP_IO [permit mmap() on sg devices]
+ - update documentation pointers in this header
+ - put KERNEL_VERSION macros around code that breaks early 2.4 series
+ - fix use count for multiple queued requests on closed fd
+ - switch back to alloc_kiovec()
+ Changes since 3.1.20 (20010814)
+ - use alloc_kiovec_sz() to speed dio [set num_buffer_heads==0]
+ - changes to cope with larger scatter gather element sizes
+ - clean up some printk()s
+ - add MODULE_LICENSE("GPL") [in a 3.1.20 subversion]
+ - fix race around generic_unplug_device() [in a 3.1.20 subversion]
+ Changes since 3.1.19 (20010623)
+ - add SG_GET_ACCESS_COUNT ioctl
+ - make open() increment and close() decrement access_count
+ - only register first 256 devices, reject subsequent devices
+ Changes since 3.1.18 (20010505)
+ - fix bug that caused long wait when large buffer requested
+ - fix leak in error case of sg_new_read() [report: Eric Barton]
+ - add 'online' column to /proc/scsi/sg/devices
+ Changes since 3.1.17 (20000921)
+ - add CAP_SYS_RAWIO capability for sensitive stuff
+ - compile in dio stuff, procfs 'allow_dio' defaulted off (0)
+ - make premature close and detach more robust
+ - lun masked into commands <= SCSI_2
+ - poll() and async notification now yield POLL_HUP on detach
+ - various 3rd party tweaks tracking lk 2.4 internal changes
+
+Map of SG verions to the Linux kernels in which they appear:
+ ---------- ----------------------------------
+ original all kernels < 2.2.6
+ 2.1.40 2.2.20
+ 3.0.x optional version 3 sg driver for 2.2 series
+ 3.1.17++ 2.4.0++
+
+Major new features in SG 3.x driver (cf SG 2.x drivers)
+ - SG_IO ioctl() combines function if write() and read()
+ - new interface (sg_io_hdr_t) but still supports old interface
+ - scatter/gather in user space, direct IO, and mmap supported
+
+ The normal action of this driver is to use the adapter (HBA) driver to DMA
+ data into kernel buffers and then use the CPU to copy the data into the
+ user space (vice versa for writes). That is called "indirect" IO due to
+ the double handling of data. There are two methods offered to remove the
+ redundant copy: 1) direct IO which uses the kernel kiobuf mechanism and
+ 2) using the mmap() system call to map the reserve buffer (this driver has
+ one reserve buffer per fd) into the user space. Both have their advantages.
+ In terms of absolute speed mmap() is faster. If speed is not a concern,
+ indirect IO should be fine. Read the documentation for more information.
+
+ ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' may be
+ needed. That pseudo file's content is defaulted to 0. **
+
+ Historical note: this SCSI pass-through driver has been known as "sg" for
+ a decade. In broader kernel discussions "sg" is used to refer to scatter
+ gather techniques. The context should clarify which "sg" is referred to.
+
+ Documentation
+ =============
+ A web site for the SG device driver can be found at:
+ http://www.torque.net/sg [alternatively check the MAINTAINERS file]
+ The documentation for the sg version 3 driver can be found at:
+ http://www.torque.net/sg/p/sg_v3_ho.html
+ This is a rendering from DocBook source [change the extension to "sgml"
+ or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon).
+
+ The older, version 2 documents discuss the original sg interface in detail:
+ http://www.torque.net/sg/p/scsi-generic.txt
+ http://www.torque.net/sg/p/scsi-generic_long.txt
+ A version of this document (potentially out of date) may also be found in
+ the kernel source tree, probably at:
+ /usr/src/linux/Documentation/scsi-generic.txt .
+
+ Utility and test programs are available at the sg web site. They are
+ bundled as sg_utils (for the lk 2.2 series) and sg3_utils (for the
+ lk 2.4 series).
+
+ There is a HOWTO on the Linux SCSI subsystem in the lk 2.4 series at:
+ http://www.linuxdoc.org/HOWTO/SCSI-2.4-HOWTO
+*/
+
+
+/* New interface introduced in the 3.x SG drivers follows */
+
+typedef struct sg_iovec /* same structure as used by readv() Linux system */
+{ /* call. It defines one scatter-gather element. */
+ void * iov_base; /* Starting address */
+ size_t iov_len; /* Length in bytes */
+} sg_iovec_t;
+
+
+typedef struct sg_io_hdr
+{
+ int interface_id; /* [i] 'S' for SCSI generic (required) */
+ int dxfer_direction; /* [i] data transfer direction */
+ unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */
+ unsigned char mx_sb_len; /* [i] max length to write to sbp */
+ unsigned short iovec_count; /* [i] 0 implies no scatter gather */
+ unsigned int dxfer_len; /* [i] byte count of data transfer */
+ void * dxferp; /* [i], [*io] points to data transfer memory
+ or scatter gather list */
+ unsigned char * cmdp; /* [i], [*i] points to command to perform */
+ unsigned char * sbp; /* [i], [*o] points to sense_buffer memory */
+ unsigned int timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */
+ unsigned int flags; /* [i] 0 -> default, see SG_FLAG... */
+ int pack_id; /* [i->o] unused internally (normally) */
+ void * usr_ptr; /* [i->o] unused internally */
+ unsigned char status; /* [o] scsi status */
+ unsigned char masked_status;/* [o] shifted, masked scsi status */
+ unsigned char msg_status; /* [o] messaging level data (optional) */
+ unsigned char sb_len_wr; /* [o] byte count actually written to sbp */
+ unsigned short host_status; /* [o] errors from host adapter */
+ unsigned short driver_status;/* [o] errors from software driver */
+ int resid; /* [o] dxfer_len - actual_transferred */
+ unsigned int duration; /* [o] time taken by cmd (unit: millisec) */
+ unsigned int info; /* [o] auxiliary information */
+} sg_io_hdr_t; /* 64 bytes long (on i386) */
+
+/* Use negative values to flag difference from original sg_header structure */
+#define SG_DXFER_NONE (-1) /* e.g. a SCSI Test Unit Ready command */
+#define SG_DXFER_TO_DEV (-2) /* e.g. a SCSI WRITE command */
+#define SG_DXFER_FROM_DEV (-3) /* e.g. a SCSI READ command */
+#define SG_DXFER_TO_FROM_DEV (-4) /* treated like SG_DXFER_FROM_DEV with the
+ additional property than during indirect
+ IO the user buffer is copied into the
+ kernel buffers before the transfer */
+#define SG_DXFER_UNKNOWN (-5) /* Unknown data direction */
+
+/* following flag values can be "or"-ed together */
+#define SG_FLAG_DIRECT_IO 1 /* default is indirect IO */
+#define SG_FLAG_LUN_INHIBIT 2 /* default is overwrite lun in SCSI */
+ /* command block (when <= SCSI_2) */
+#define SG_FLAG_MMAP_IO 4 /* request memory mapped IO */
+#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
+ /* user space (debug indirect IO) */
+
+/* following 'info' values are "or"-ed together */
+#define SG_INFO_OK_MASK 0x1
+#define SG_INFO_OK 0x0 /* no sense, host nor driver "noise" */
+#define SG_INFO_CHECK 0x1 /* something abnormal happened */
+
+#define SG_INFO_DIRECT_IO_MASK 0x6
+#define SG_INFO_INDIRECT_IO 0x0 /* data xfer via kernel buffers (or no xfer) */
+#define SG_INFO_DIRECT_IO 0x2 /* direct IO requested and performed */
+#define SG_INFO_MIXED_IO 0x4 /* part direct, part indirect IO */
+
+
+typedef struct sg_scsi_id { /* used by SG_GET_SCSI_ID ioctl() */
+ int host_no; /* as in "scsi<n>" where 'n' is one of 0, 1, 2 etc */
+ int channel;
+ int scsi_id; /* scsi id of target device */
+ int lun;
+ int scsi_type; /* TYPE_... defined in scsi/scsi.h */
+ short h_cmd_per_lun;/* host (adapter) maximum commands per lun */
+ short d_queue_depth;/* device (or adapter) maximum queue length */
+ int unused[2]; /* probably find a good use, set 0 for now */
+} sg_scsi_id_t; /* 32 bytes long on i386 */
+
+typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
+ char req_state; /* 0 -> not used, 1 -> written, 2 -> ready to read */
+ char orphan; /* 0 -> normal request, 1 -> from interruped SG_IO */
+ char sg_io_owned; /* 0 -> complete with read(), 1 -> owned by SG_IO */
+ char problem; /* 0 -> no problem detected, 1 -> error to report */
+ int pack_id; /* pack_id associated with request */
+ void * usr_ptr; /* user provided pointer (in new interface) */
+ unsigned int duration; /* millisecs elapsed since written (req_state==1)
+ or request duration (req_state==2) */
+ int unused;
+} sg_req_info_t; /* 20 bytes long on i386 */
+
+
+/* IOCTLs: Those ioctls that are relevant to the SG 3.x drivers follow.
+ [Those that only apply to the SG 2.x drivers are at the end of the file.]
+ (_GET_s yield result via 'int *' 3rd argument unless otherwise indicated) */
+
+#define SG_EMULATED_HOST 0x2203 /* true for emulated host adapter (ATAPI) */
+
+/* Used to configure SCSI command transformation layer for ATAPI devices */
+/* Only supported by the ide-scsi driver */
+#define SG_SET_TRANSFORM 0x2204 /* N.B. 3rd arg is not pointer but value: */
+ /* 3rd arg = 0 to disable transform, 1 to enable it */
+#define SG_GET_TRANSFORM 0x2205
+
+#define SG_SET_RESERVED_SIZE 0x2275 /* request a new reserved buffer size */
+#define SG_GET_RESERVED_SIZE 0x2272 /* actual size of reserved buffer */
+
+/* The following ioctl has a 'sg_scsi_id_t *' object as its 3rd argument. */
+#define SG_GET_SCSI_ID 0x2276 /* Yields fd's bus, chan, dev, lun + type */
+/* SCSI id information can also be obtained from SCSI_IOCTL_GET_IDLUN */
+
+/* Override host setting and always DMA using low memory ( <16MB on i386) */
+#define SG_SET_FORCE_LOW_DMA 0x2279 /* 0-> use adapter setting, 1-> force */
+#define SG_GET_LOW_DMA 0x227a /* 0-> use all ram for dma; 1-> low dma ram */
+
+/* When SG_SET_FORCE_PACK_ID set to 1, pack_id is input to read() which
+ tries to fetch a packet with a matching pack_id, waits, or returns EAGAIN.
+ If pack_id is -1 then read oldest waiting. When ...FORCE_PACK_ID set to 0
+ then pack_id ignored by read() and oldest readable fetched. */
+#define SG_SET_FORCE_PACK_ID 0x227b
+#define SG_GET_PACK_ID 0x227c /* Yields oldest readable pack_id (or -1) */
+
+#define SG_GET_NUM_WAITING 0x227d /* Number of commands awaiting read() */
+
+/* Yields max scatter gather tablesize allowed by current host adapter */
+#define SG_GET_SG_TABLESIZE 0x227F /* 0 implies can't do scatter gather */
+
+#define SG_GET_VERSION_NUM 0x2282 /* Example: version 2.1.34 yields 20134 */
+
+/* Returns -EBUSY if occupied. 3rd argument pointer to int (see next) */
+#define SG_SCSI_RESET 0x2284
+/* Associated values that can be given to SG_SCSI_RESET follow */
+#define SG_SCSI_RESET_NOTHING 0
+#define SG_SCSI_RESET_DEVICE 1
+#define SG_SCSI_RESET_BUS 2
+#define SG_SCSI_RESET_HOST 3
+
+/* synchronous SCSI command ioctl, (only in version 3 interface) */
+#define SG_IO 0x2285 /* similar effect as write() followed by read() */
+
+#define SG_GET_REQUEST_TABLE 0x2286 /* yields table of active requests */
+
+/* How to treat EINTR during SG_IO ioctl(), only in SG 3.x series */
+#define SG_SET_KEEP_ORPHAN 0x2287 /* 1 -> hold for read(), 0 -> drop (def) */
+#define SG_GET_KEEP_ORPHAN 0x2288
+
+/* yields scsi midlevel's access_count for this SCSI device */
+#define SG_GET_ACCESS_COUNT 0x2289
+
+
+#define SG_SCATTER_SZ (8 * 4096) /* PAGE_SIZE not available to user */
+/* Largest size (in bytes) a single scatter-gather list element can have.
+ The value must be a power of 2 and <= (PAGE_SIZE * 32) [131072 bytes on
+ i386]. The minimum value is PAGE_SIZE. If scatter-gather not supported
+ by adapter then this value is the largest data block that can be
+ read/written by a single scsi command. The user can find the value of
+ PAGE_SIZE by calling getpagesize() defined in unistd.h . */
+
+#define SG_DEFAULT_RETRIES 1
+
+/* Defaults, commented if they differ from original sg driver */
+#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */
+#define SG_DEF_FORCE_PACK_ID 0
+#define SG_DEF_KEEP_ORPHAN 0
+#define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */
+
+/* maximum outstanding requests, write() yields EDOM if exceeded */
+#define SG_MAX_QUEUE 16
+
+#define SG_BIG_BUFF SG_DEF_RESERVED_SIZE /* for backward compatibility */
+
+/* Alternate style type names, "..._t" variants preferred */
+typedef struct sg_io_hdr Sg_io_hdr;
+typedef struct sg_io_vec Sg_io_vec;
+typedef struct sg_scsi_id Sg_scsi_id;
+typedef struct sg_req_info Sg_req_info;
+
+
+/* vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv */
+/* The older SG interface based on the 'sg_header' structure follows. */
+/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+#define SG_MAX_SENSE 16 /* this only applies to the sg_header interface */
+
+struct sg_header
+{
+ int pack_len; /* [o] reply_len (ie useless), ignored as input */
+ int reply_len; /* [i] max length of expected reply (inc. sg_header) */
+ int pack_id; /* [io] id number of packet (use ints >= 0) */
+ int result; /* [o] 0==ok, else (+ve) Unix errno (best ignored) */
+ unsigned int twelve_byte:1;
+ /* [i] Force 12 byte command length for group 6 & 7 commands */
+ unsigned int target_status:5; /* [o] scsi status from target */
+ unsigned int host_status:8; /* [o] host status (see "DID" codes) */
+ unsigned int driver_status:8; /* [o] driver status+suggestion */
+ unsigned int other_flags:10; /* unused */
+ unsigned char sense_buffer[SG_MAX_SENSE]; /* [o] Output in 3 cases:
+ when target_status is CHECK_CONDITION or
+ when target_status is COMMAND_TERMINATED or
+ when (driver_status & DRIVER_SENSE) is true. */
+}; /* This structure is 36 bytes long on i386 */
+
+
+/* IOCTLs: The following are not required (or ignored) when the sg_io_hdr_t
+ interface is used. They are kept for backward compatibility with
+ the original and version 2 drivers. */
+
+#define SG_SET_TIMEOUT 0x2201 /* unit: jiffies (10ms on i386) */
+#define SG_GET_TIMEOUT 0x2202 /* yield timeout as _return_ value */
+
+/* Get/set command queuing state per fd (default is SG_DEF_COMMAND_Q.
+ Each time a sg_io_hdr_t object is seen on this file descriptor, this
+ command queuing flag is set on (overriding the previous setting). */
+#define SG_GET_COMMAND_Q 0x2270 /* Yields 0 (queuing off) or 1 (on) */
+#define SG_SET_COMMAND_Q 0x2271 /* Change queuing state with 0 or 1 */
+
+/* Turn on/off error sense trace (1 and 0 respectively, default is off).
+ Try using: "# cat /proc/scsi/sg/debug" instead in the v3 driver */
+#define SG_SET_DEBUG 0x227e /* 0 -> turn off debug */
+
+#define SG_NEXT_CMD_LEN 0x2283 /* override SCSI command length with given
+ number on the next write() on this file descriptor */
+
+
+/* Defaults, commented if they differ from original sg driver */
+#define SG_DEFAULT_TIMEOUT (60*HZ) /* HZ == 'jiffies in 1 second' */
+#define SG_DEF_COMMAND_Q 0 /* command queuing is always on when
+ the new interface is used */
+#define SG_DEF_UNDERRUN_FLAG 0
+
+#endif
diff --git a/xen/include/stdarg.h b/xen/include/stdarg.h
new file mode 100644
index 0000000000..9f6215d31f
--- /dev/null
+++ b/xen/include/stdarg.h
@@ -0,0 +1,138 @@
+/* Copyright (C) 1989, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/*
+ * ISO C Standard: 7.15 Variable arguments <stdarg.h>
+ */
+
+#ifndef _STDARG_H
+#ifndef _ANSI_STDARG_H_
+#ifndef __need___va_list
+#define _STDARG_H
+#define _ANSI_STDARG_H_
+#endif /* not __need___va_list */
+#undef __need___va_list
+
+/* Define __gnuc_va_list. */
+
+#ifndef __GNUC_VA_LIST
+#define __GNUC_VA_LIST
+typedef __builtin_va_list __gnuc_va_list;
+#endif
+
+/* Define the standard macros for the user,
+ if this invocation was from the user program. */
+#ifdef _STDARG_H
+
+/* Note that the type used in va_arg is supposed to match the
+ actual type **after default promotions**.
+ Thus, va_arg (..., short) is not valid. */
+
+#define va_start(v,l) __builtin_stdarg_start((v),l)
+#define va_end __builtin_va_end
+#define va_arg __builtin_va_arg
+#if !defined(__STRICT_ANSI__) || __STDC_VERSION__ + 0 >= 199900L
+#define va_copy(d,s) __builtin_va_copy((d),(s))
+#endif
+#define __va_copy(d,s) __builtin_va_copy((d),(s))
+
+
+/* Define va_list, if desired, from __gnuc_va_list. */
+/* We deliberately do not define va_list when called from
+ stdio.h, because ANSI C says that stdio.h is not supposed to define
+ va_list. stdio.h needs to have access to that data type,
+ but must not use that name. It should use the name __gnuc_va_list,
+ which is safe because it is reserved for the implementation. */
+
+#ifdef _HIDDEN_VA_LIST /* On OSF1, this means varargs.h is "half-loaded". */
+#undef _VA_LIST
+#endif
+
+#ifdef _BSD_VA_LIST
+#undef _BSD_VA_LIST
+#endif
+
+#if defined(__svr4__) || (defined(_SCO_DS) && !defined(__VA_LIST))
+/* SVR4.2 uses _VA_LIST for an internal alias for va_list,
+ so we must avoid testing it and setting it here.
+ SVR4 uses _VA_LIST as a flag in stdarg.h, but we should
+ have no conflict with that. */
+#ifndef _VA_LIST_
+#define _VA_LIST_
+#ifdef __i860__
+#ifndef _VA_LIST
+#define _VA_LIST va_list
+#endif
+#endif /* __i860__ */
+typedef __gnuc_va_list va_list;
+#ifdef _SCO_DS
+#define __VA_LIST
+#endif
+#endif /* _VA_LIST_ */
+#else /* not __svr4__ || _SCO_DS */
+
+/* The macro _VA_LIST_ is the same thing used by this file in Ultrix.
+ But on BSD NET2 we must not test or define or undef it.
+ (Note that the comments in NET 2's ansi.h
+ are incorrect for _VA_LIST_--see stdio.h!) */
+#if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT)
+/* The macro _VA_LIST_DEFINED is used in Windows NT 3.5 */
+#ifndef _VA_LIST_DEFINED
+/* The macro _VA_LIST is used in SCO Unix 3.2. */
+#ifndef _VA_LIST
+/* The macro _VA_LIST_T_H is used in the Bull dpx2 */
+#ifndef _VA_LIST_T_H
+/* The macro __va_list__ is used by BeOS. */
+#ifndef __va_list__
+typedef __gnuc_va_list va_list;
+#endif /* not __va_list__ */
+#endif /* not _VA_LIST_T_H */
+#endif /* not _VA_LIST */
+#endif /* not _VA_LIST_DEFINED */
+#if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__))
+#define _VA_LIST_
+#endif
+#ifndef _VA_LIST
+#define _VA_LIST
+#endif
+#ifndef _VA_LIST_DEFINED
+#define _VA_LIST_DEFINED
+#endif
+#ifndef _VA_LIST_T_H
+#define _VA_LIST_T_H
+#endif
+#ifndef __va_list__
+#define __va_list__
+#endif
+
+#endif /* not _VA_LIST_, except on certain systems */
+
+#endif /* not __svr4__ */
+
+#endif /* _STDARG_H */
+
+#endif /* not _ANSI_STDARG_H_ */
+#endif /* not _STDARG_H */
diff --git a/xen/include/xeno/ac_timer.h b/xen/include/xeno/ac_timer.h
new file mode 100644
index 0000000000..7cf568d2fc
--- /dev/null
+++ b/xen/include/xeno/ac_timer.h
@@ -0,0 +1,65 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: ac_timer.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: Accurate timer for the Hypervisor
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _AC_TIMER_H_
+#define _AC_TIMER_H_
+
+#include <xeno/time.h> /* include notion of time */
+
+/*
+ * The Xen Hypervisor provides two types of timers:
+ *
+ * - Linux style, jiffy based timers for legacy code and coarse grain timeouts
+ * These are defined in ./include/xeno/timer.h and implemented in
+ * ./common/timer.c. Unlike in Linux they are executed not on a periodic
+ * timer interrupt but "occasionally" with somewhat lesser accuracy.
+ *
+ * - accurate timers defined in this file and implemented in
+ * ./common/ac_timer.c. These are implemented using a programmable timer
+ * interrupt and are thus as accurate as the hardware allows. Where possible
+ * we use the local APIC for this purpose. However, this fact is hidden
+ * behind a architecture independent layer.
+ * accurate timers are programmed using system time.
+ *
+ * The interface to accurate timers is very similar to Linux timers with the
+ * exception that the expires value is not expressed in jiffies but in ns from
+ * boot time. Its implementation however, is entirely different.
+ */
+
+struct ac_timer {
+ struct list_head timer_list;
+ s_time_t expires; /* system time time out value */
+ unsigned long data;
+ void (*function)(unsigned long);
+};
+
+/* interface for "clients" */
+extern int add_ac_timer(struct ac_timer *timer);
+extern int rem_ac_timer(struct ac_timer *timer);
+extern int mod_ac_timer(struct ac_timer *timer, s_time_t new_time);
+static inline void init_ac_timer(struct ac_timer *timer)
+{
+ //timer->next = NULL;
+}
+
+/* interface used by programmable timer, implemented hardware dependent */
+extern int reprogram_ac_timer(s_time_t timeout);
+extern void do_ac_timer(void);
+
+#endif /* _AC_TIMER_H_ */
diff --git a/xen/include/xeno/blk.h b/xen/include/xeno/blk.h
new file mode 100644
index 0000000000..bc3f5548c6
--- /dev/null
+++ b/xen/include/xeno/blk.h
@@ -0,0 +1,409 @@
+#ifndef _BLK_H
+#define _BLK_H
+
+#include <xeno/blkdev.h>
+/*#include <xeno/locks.h>*/
+#include <xeno/config.h>
+#include <xeno/spinlock.h>
+
+/*
+ * Spinlock for protecting the request queue which
+ * is mucked around with in interrupts on potentially
+ * multiple CPU's..
+ */
+extern spinlock_t io_request_lock;
+
+/*
+ * Initialization functions.
+ */
+extern int isp16_init(void);
+extern int cdu31a_init(void);
+extern int acsi_init(void);
+extern int mcd_init(void);
+extern int mcdx_init(void);
+extern int sbpcd_init(void);
+extern int aztcd_init(void);
+extern int sony535_init(void);
+extern int gscd_init(void);
+extern int cm206_init(void);
+extern int optcd_init(void);
+extern int sjcd_init(void);
+extern int cdi_init(void);
+extern int hd_init(void);
+extern int ide_init(void);
+extern int xd_init(void);
+extern int mfm_init(void);
+extern int loop_init(void);
+extern int md_init(void);
+extern int ap_init(void);
+extern int ddv_init(void);
+extern int z2_init(void);
+extern int swim3_init(void);
+extern int swimiop_init(void);
+extern int amiga_floppy_init(void);
+extern int atari_floppy_init(void);
+extern int ez_init(void);
+extern int bpcd_init(void);
+extern int ps2esdi_init(void);
+extern int jsfd_init(void);
+extern int viodasd_init(void);
+extern int viocd_init(void);
+
+#if defined(CONFIG_ARCH_S390)
+extern int dasd_init(void);
+extern int xpram_init(void);
+extern int tapeblock_init(void);
+#endif /* CONFIG_ARCH_S390 */
+
+extern void set_device_ro(kdev_t dev,int flag);
+#if 0
+void add_blkdev_randomness(int major);
+#else
+#define add_blkdev_randomness(_major) ((void)0)
+#endif
+
+extern int floppy_init(void);
+extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
+extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
+extern int rd_image_start; /* starting block # of image */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
+
+extern unsigned long initrd_start,initrd_end;
+extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */
+void initrd_init(void);
+
+#endif
+
+
+/*
+ * end_request() and friends. Must be called with the request queue spinlock
+ * acquired. All functions called within end_request() _must_be_ atomic.
+ *
+ * Several drivers define their own end_request and call
+ * end_that_request_first() and end_that_request_last()
+ * for parts of the original function. This prevents
+ * code duplication in drivers.
+ */
+
+static inline void blkdev_dequeue_request(struct request * req)
+{
+ list_del(&req->queue);
+}
+
+int end_that_request_first(struct request *req, int uptodate, char *name);
+void end_that_request_last(struct request *req);
+
+#if defined(MAJOR_NR) || defined(IDE_DRIVER)
+
+#undef DEVICE_ON
+#undef DEVICE_OFF
+
+/*
+ * Add entries as needed.
+ */
+
+#ifdef IDE_DRIVER
+
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+#define DEVICE_NAME "ide"
+
+#elif (MAJOR_NR == RAMDISK_MAJOR)
+
+/* ram disk */
+#define DEVICE_NAME "ramdisk"
+#define DEVICE_NR(device) (MINOR(device))
+#define DEVICE_NO_RANDOM
+
+#elif (MAJOR_NR == Z2RAM_MAJOR)
+
+/* Zorro II Ram */
+#define DEVICE_NAME "Z2RAM"
+#define DEVICE_REQUEST do_z2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == FLOPPY_MAJOR)
+
+static void floppy_off(unsigned int nr);
+
+#define DEVICE_NAME "floppy"
+#define DEVICE_INTR do_floppy
+#define DEVICE_REQUEST do_fd_request
+#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 ))
+#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device))
+
+#elif (MAJOR_NR == HD_MAJOR)
+
+/* Hard disk: timeout is 6 seconds. */
+#define DEVICE_NAME "hard disk"
+#define DEVICE_INTR do_hd
+#define TIMEOUT_VALUE (6*HZ)
+#define DEVICE_REQUEST do_hd_request
+#define DEVICE_NR(device) (MINOR(device)>>6)
+
+#elif (SCSI_DISK_MAJOR(MAJOR_NR))
+
+#define DEVICE_NAME "scsidisk"
+#define TIMEOUT_VALUE (2*HZ)
+#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4))
+
+/* Kludge to use the same number for both char and block major numbers */
+#elif (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER)
+
+#define DEVICE_NAME "Multiple devices driver"
+#define DEVICE_REQUEST do_md_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SCSI_TAPE_MAJOR)
+
+#define DEVICE_NAME "scsitape"
+#define DEVICE_INTR do_st
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+
+#elif (MAJOR_NR == OSST_MAJOR)
+
+#define DEVICE_NAME "onstream"
+#define DEVICE_INTR do_osst
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+#define DEVICE_ON(device)
+#define DEVICE_OFF(device)
+
+#elif (MAJOR_NR == SCSI_CDROM_MAJOR)
+
+#define DEVICE_NAME "CD-ROM"
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == XT_DISK_MAJOR)
+
+#define DEVICE_NAME "xt disk"
+#define DEVICE_REQUEST do_xd_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == PS2ESDI_MAJOR)
+
+#define DEVICE_NAME "PS/2 ESDI"
+#define DEVICE_REQUEST do_ps2esdi_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == CDU31A_CDROM_MAJOR)
+
+#define DEVICE_NAME "CDU31A"
+#define DEVICE_REQUEST do_cdu31a_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE))
+
+#define DEVICE_NAME "ACSI"
+#define DEVICE_INTR do_acsi
+#define DEVICE_REQUEST do_acsi_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcd */
+#define DEVICE_REQUEST do_mcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcdx */
+#define DEVICE_REQUEST do_mcdx_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #1"
+#define DEVICE_REQUEST do_sbpcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #2"
+#define DEVICE_REQUEST do_sbpcd2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #3"
+#define DEVICE_REQUEST do_sbpcd3_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #4"
+#define DEVICE_REQUEST do_sbpcd4_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == AZTECH_CDROM_MAJOR)
+
+#define DEVICE_NAME "Aztech CD-ROM"
+#define DEVICE_REQUEST do_aztcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CDU535_CDROM_MAJOR)
+
+#define DEVICE_NAME "SONY-CDU535"
+#define DEVICE_INTR do_cdu535
+#define DEVICE_REQUEST do_cdu535_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR)
+
+#define DEVICE_NAME "Goldstar R420"
+#define DEVICE_REQUEST do_gscd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CM206_CDROM_MAJOR)
+#define DEVICE_NAME "Philips/LMS CD-ROM cm206"
+#define DEVICE_REQUEST do_cm206_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == OPTICS_CDROM_MAJOR)
+
+#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM"
+#define DEVICE_REQUEST do_optcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SANYO_CDROM_MAJOR)
+
+#define DEVICE_NAME "Sanyo H94A CD-ROM"
+#define DEVICE_REQUEST do_sjcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == APBLOCK_MAJOR)
+
+#define DEVICE_NAME "apblock"
+#define DEVICE_REQUEST ap_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DDV_MAJOR)
+
+#define DEVICE_NAME "ddv"
+#define DEVICE_REQUEST ddv_request
+#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS)
+
+#elif (MAJOR_NR == MFM_ACORN_MAJOR)
+
+#define DEVICE_NAME "mfm disk"
+#define DEVICE_INTR do_mfm
+#define DEVICE_REQUEST do_mfm_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == NBD_MAJOR)
+
+#define DEVICE_NAME "nbd"
+#define DEVICE_REQUEST do_nbd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MDISK_MAJOR)
+
+#define DEVICE_NAME "mdisk"
+#define DEVICE_REQUEST mdisk_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DASD_MAJOR)
+
+#define DEVICE_NAME "dasd"
+#define DEVICE_REQUEST do_dasd_request
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+
+#elif (MAJOR_NR == I2O_MAJOR)
+
+#define DEVICE_NAME "I2O block"
+#define DEVICE_REQUEST i2ob_request
+#define DEVICE_NR(device) (MINOR(device)>>4)
+
+#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR)
+
+#define DEVICE_NAME "ida"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_ida_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#endif /* MAJOR_NR == whatever */
+
+/* provide DEVICE_xxx defaults, if not explicitly defined
+ * above in the MAJOR_NR==xxx if-elif tree */
+#ifndef DEVICE_ON
+#define DEVICE_ON(device) do {} while (0)
+#endif
+#ifndef DEVICE_OFF
+#define DEVICE_OFF(device) do {} while (0)
+#endif
+
+#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR)
+#if !defined(IDE_DRIVER)
+
+#ifndef CURRENT
+#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+#ifndef QUEUE_EMPTY
+#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+
+#ifndef DEVICE_NAME
+#define DEVICE_NAME "unknown"
+#endif
+
+#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev)
+
+#ifdef DEVICE_INTR
+static void (*DEVICE_INTR)(void) = NULL;
+#endif
+
+#define SET_INTR(x) (DEVICE_INTR = (x))
+
+#ifdef DEVICE_REQUEST
+static void (DEVICE_REQUEST)(request_queue_t *);
+#endif
+
+#ifdef DEVICE_INTR
+#define CLEAR_INTR SET_INTR(NULL)
+#else
+#define CLEAR_INTR
+#endif
+
+#define INIT_REQUEST \
+ if (QUEUE_EMPTY) {\
+ CLEAR_INTR; \
+ return; \
+ } \
+ if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \
+ panic(DEVICE_NAME ": request list destroyed"); \
+ if (CURRENT->bh) { \
+ if (!buffer_locked(CURRENT->bh)) \
+ panic(DEVICE_NAME ": block not locked"); \
+ }
+
+#endif /* !defined(IDE_DRIVER) */
+
+
+#ifndef LOCAL_END_REQUEST /* If we have our own end_request, we do not want to include this mess */
+
+#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR)
+
+static inline void end_request(int uptodate) {
+ struct request *req = CURRENT;
+
+ if (end_that_request_first(req, uptodate, DEVICE_NAME))
+ return;
+
+#ifndef DEVICE_NO_RANDOM
+ add_blkdev_randomness(MAJOR(req->rq_dev));
+#endif
+ DEVICE_OFF(req->rq_dev);
+ blkdev_dequeue_request(req);
+ end_that_request_last(req);
+}
+
+#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */
+#endif /* LOCAL_END_REQUEST */
+
+#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */
+#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */
+
+#endif /* _BLK_H */
diff --git a/xen/include/xeno/blkdev.h b/xen/include/xeno/blkdev.h
new file mode 100644
index 0000000000..a2cd390517
--- /dev/null
+++ b/xen/include/xeno/blkdev.h
@@ -0,0 +1,371 @@
+#ifndef _LINUX_BLKDEV_H
+#define _LINUX_BLKDEV_H
+
+#include <xeno/lib.h>
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <xeno/list.h>
+#include <xeno/kdev_t.h>
+#include <xeno/sched.h>
+
+/* Some defines from fs.h that may actually be useful to the blkdev layer. */
+#define READ 0
+#define WRITE 1
+#define READA 2
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+extern void init_blkdev_info(struct task_struct *);
+extern void destroy_blkdev_info(struct task_struct *);
+
+extern int unregister_blkdev(unsigned int, const char *);
+extern int invalidate_device(kdev_t, int);
+extern int check_disk_change(kdev_t);
+struct block_device;
+extern void invalidate_bdev(struct block_device *, int);
+
+/*
+ * Metainformation regarding block devices is kept in inode and file
+ * structures. We don't actually want those so we define just as much
+ * as we need right here.
+ */
+struct file {
+};
+struct inode {
+ kdev_t i_rdev; /* for _open and _release, specifies the blkdev */
+ struct block_device *i_bdev;
+};
+
+struct block_device_operations {
+ int (*open) (struct inode *, struct file *);
+ int (*release) (struct inode *, struct file *);
+ int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
+ int (*check_media_change) (kdev_t);
+ int (*revalidate) (kdev_t);
+};
+
+
+enum bh_state_bits {
+ BH_Uptodate, /* 1 if the buffer contains valid data */
+ BH_Dirty, /* 1 if the buffer is dirty */
+ BH_Lock, /* 1 if the buffer is locked */
+ BH_Req, /* 0 if the buffer has been invalidated */
+ BH_Mapped, /* 1 if the buffer has a disk mapping */
+ BH_New, /* 1 if the buffer is new and not yet written out */
+ BH_Async, /* 1 if the buffer is under end_buffer_io_async I/O */
+ BH_Wait_IO, /* 1 if we should write out this buffer */
+ BH_Launder, /* 1 if we can throttle on this buffer */
+ BH_JBD, /* 1 if it has an attached journal_head */
+ BH_Read, /* 1 if request is a read from disc */
+ BH_Write /* 1 if request is a write to disc */
+};
+
+struct buffer_head {
+ unsigned long b_blocknr; /* block number */
+ unsigned short b_size; /* block size */
+ unsigned short b_list; /* List that this buffer appears */
+ kdev_t b_dev; /* device (B_FREE = free) */
+
+ atomic_t b_count; /* users using this block */
+ kdev_t b_rdev; /* Real device */
+ unsigned long b_state; /* buffer state bitmap (see above) */
+
+ struct buffer_head *b_reqnext; /* request queue */
+
+ char * b_data; /* pointer to data block */
+ struct pfn_info *b_page; /* the page this bh is mapped to */
+ void (*b_end_io)(struct buffer_head *bh, int uptodate);
+
+ unsigned long b_rsector; /* Real buffer location on disk */
+
+ /* Both used by b_end_io function in xen_block.c */
+ void *b_xen_domain;
+ void *b_xen_id;
+};
+
+typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
+void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
+
+#define __buffer_state(bh, state) (((bh)->b_state & (1UL << BH_##state)) != 0)
+
+#define buffer_uptodate(bh) __buffer_state(bh,Uptodate)
+#define buffer_dirty(bh) __buffer_state(bh,Dirty)
+#define buffer_locked(bh) __buffer_state(bh,Lock)
+#define buffer_req(bh) __buffer_state(bh,Req)
+#define buffer_mapped(bh) __buffer_state(bh,Mapped)
+#define buffer_new(bh) __buffer_state(bh,New)
+#define buffer_async(bh) __buffer_state(bh,Async)
+#define buffer_launder(bh) __buffer_state(bh,Launder)
+
+#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
+
+extern void set_bh_page(struct buffer_head *bh, struct pfn_info *page, unsigned long offset);
+
+#define touch_buffer(bh) mark_page_accessed(bh->b_page)
+
+#define atomic_set_buffer_clean(bh) test_and_clear_bit(BH_Dirty, &(bh)->b_state)
+
+static inline void __mark_buffer_clean(struct buffer_head *bh)
+{
+ panic("__mark_buffer_clean");
+}
+
+static inline void mark_buffer_clean(struct buffer_head * bh)
+{
+ if (atomic_set_buffer_clean(bh))
+ __mark_buffer_clean(bh);
+}
+
+static inline void buffer_IO_error(struct buffer_head * bh)
+{
+ mark_buffer_clean(bh);
+ /* b_end_io has to clear the BH_Uptodate bitflag in the error case! */
+ bh->b_end_io(bh, 0);
+}
+
+/**** XXX END OF BUFFER_HEAD STUFF XXXX ****/
+
+#include <xeno/major.h>
+#include <xeno/sched.h>
+#include <xeno/genhd.h>
+#include <xeno/tqueue.h>
+#include <xeno/list.h>
+
+struct request_queue;
+typedef struct request_queue request_queue_t;
+struct elevator_s;
+typedef struct elevator_s elevator_t;
+
+/*
+ * Ok, this is an expanded form so that we can use the same
+ * request for paging requests.
+ */
+struct request {
+ struct list_head queue;
+ int elevator_sequence;
+
+ volatile int rq_status; /* should split this into a few status bits */
+#define RQ_INACTIVE (-1)
+#define RQ_ACTIVE 1
+#define RQ_SCSI_BUSY 0xffff
+#define RQ_SCSI_DONE 0xfffe
+#define RQ_SCSI_DISCONNECTING 0xffe0
+
+ kdev_t rq_dev;
+ int cmd; /* READ or WRITE */
+ int errors;
+ unsigned long start_time;
+ unsigned long sector;
+ unsigned long nr_sectors;
+ unsigned long hard_sector, hard_nr_sectors;
+ unsigned int nr_segments;
+ unsigned int nr_hw_segments;
+ unsigned long current_nr_sectors;
+ void * special;
+ char * buffer;
+ struct completion * waiting;
+ struct buffer_head * bh;
+ struct buffer_head * bhtail;
+ request_queue_t *q;
+};
+
+#include <xeno/elevator.h>
+
+typedef int (merge_request_fn) (request_queue_t *q,
+ struct request *req,
+ struct buffer_head *bh,
+ int);
+typedef int (merge_requests_fn) (request_queue_t *q,
+ struct request *req,
+ struct request *req2,
+ int);
+typedef void (request_fn_proc) (request_queue_t *q);
+typedef request_queue_t * (queue_proc) (kdev_t dev);
+typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh);
+typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
+typedef void (unplug_device_fn) (void *q);
+
+/*
+ * Default nr free requests per queue, ll_rw_blk will scale it down
+ * according to available RAM at init time
+ */
+#define QUEUE_NR_REQUESTS 8192
+
+struct request_list {
+ unsigned int count;
+ struct list_head free;
+};
+
+struct request_queue
+{
+ /*
+ * the queue request freelist, one for reads and one for writes
+ */
+ struct request_list rq[2];
+
+ /*
+ * The total number of requests on each queue
+ */
+ int nr_requests;
+
+ /*
+ * Batching threshold for sleep/wakeup decisions
+ */
+ int batch_requests;
+
+ /*
+ * Together with queue_head for cacheline sharing
+ */
+ struct list_head queue_head;
+ elevator_t elevator;
+
+ request_fn_proc * request_fn;
+ merge_request_fn * back_merge_fn;
+ merge_request_fn * front_merge_fn;
+ merge_requests_fn * merge_requests_fn;
+ make_request_fn * make_request_fn;
+ plug_device_fn * plug_device_fn;
+ /*
+ * The queue owner gets to use this for whatever they like.
+ * ll_rw_blk doesn't touch it.
+ */
+ void * queuedata;
+
+ /*
+ * This is used to remove the plug when tq_disk runs.
+ */
+ struct tq_struct plug_tq;
+
+ /*
+ * Boolean that indicates whether this queue is plugged or not.
+ */
+ char plugged;
+
+ /*
+ * Boolean that indicates whether current_request is active or
+ * not.
+ */
+ char head_active;
+
+ /*
+ * Is meant to protect the queue in the future instead of
+ * io_request_lock
+ */
+ spinlock_t queue_lock;
+
+#if 0
+ /*
+ * Tasks wait here for free read and write requests
+ */
+ wait_queue_head_t wait_for_requests[2];
+#endif
+};
+
+#define bh_phys(bh) (page_to_phys((bh)->b_page) + bh_offset((bh)))
+
+struct blk_dev_struct {
+ /*
+ * queue_proc has to be atomic
+ */
+ request_queue_t request_queue;
+ queue_proc *queue;
+ void *data;
+};
+
+struct sec_size {
+ unsigned block_size;
+ unsigned block_size_bits;
+};
+
+/*
+ * Used to indicate the default queue for drivers that don't bother
+ * to implement multiple queues. We have this access macro here
+ * so as to eliminate the need for each and every block device
+ * driver to know about the internal structure of blk_dev[].
+ */
+#define BLK_DEFAULT_QUEUE(_MAJOR) &blk_dev[_MAJOR].request_queue
+
+extern struct sec_size * blk_sec[MAX_BLKDEV];
+extern struct blk_dev_struct blk_dev[MAX_BLKDEV];
+extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size);
+extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
+extern void generic_make_request(int rw, struct buffer_head * bh);
+extern inline request_queue_t *blk_get_queue(kdev_t dev);
+extern void blkdev_release_request(struct request *);
+
+/*
+ * Access functions for manipulating queue properties
+ */
+extern int blk_grow_request_list(request_queue_t *q, int nr_requests);
+extern void blk_init_queue(request_queue_t *, request_fn_proc *);
+extern void blk_cleanup_queue(request_queue_t *);
+extern void blk_queue_headactive(request_queue_t *, int);
+extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void generic_unplug_device(void *);
+
+extern int * blk_size[MAX_BLKDEV];
+
+extern int * blksize_size[MAX_BLKDEV];
+
+extern int * hardsect_size[MAX_BLKDEV];
+
+/*extern int * max_readahead[MAX_BLKDEV];*/
+
+extern int * max_sectors[MAX_BLKDEV];
+
+extern int * max_segments[MAX_BLKDEV];
+
+#define MAX_SEGMENTS 128
+#define MAX_SECTORS 255
+
+#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
+
+#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue)
+#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next)
+#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev)
+#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next)
+#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev)
+
+extern void drive_stat_acct (kdev_t dev, int rw,
+ unsigned long nr_sectors, int new_io);
+
+static inline int get_hardsect_size(kdev_t dev)
+{
+ int retval = 512;
+ int major = MAJOR(dev);
+
+ if (hardsect_size[major]) {
+ int minor = MINOR(dev);
+ if (hardsect_size[major][minor])
+ retval = hardsect_size[major][minor];
+ }
+ return retval;
+}
+
+#define blk_finished_io(nsects) do { } while (0)
+#define blk_started_io(nsects) do { } while (0)
+
+static inline unsigned int blksize_bits(unsigned int size)
+{
+ unsigned int bits = 8;
+ do {
+ bits++;
+ size >>= 1;
+ } while (size > 256);
+ return bits;
+}
+
+static inline unsigned int block_size(kdev_t dev)
+{
+ int retval = BLOCK_SIZE;
+ int major = MAJOR(dev);
+
+ if (blksize_size[major]) {
+ int minor = MINOR(dev);
+ if (blksize_size[major][minor])
+ retval = blksize_size[major][minor];
+ }
+ return retval;
+}
+
+#endif
diff --git a/xen/include/xeno/blkpg.h b/xen/include/xeno/blkpg.h
new file mode 100644
index 0000000000..f4240abaf1
--- /dev/null
+++ b/xen/include/xeno/blkpg.h
@@ -0,0 +1,64 @@
+#ifndef _LINUX_BLKPG_H
+#define _LINUX_BLKPG_H
+
+/*
+ * Partition table and disk geometry handling
+ *
+ * A single ioctl with lots of subfunctions:
+ *
+ * Device number stuff:
+ * get_whole_disk() (given the device number of a partition,
+ * find the device number of the encompassing disk)
+ * get_all_partitions() (given the device number of a disk, return the
+ * device numbers of all its known partitions)
+ *
+ * Partition stuff:
+ * add_partition()
+ * delete_partition()
+ * test_partition_in_use() (also for test_disk_in_use)
+ *
+ * Geometry stuff:
+ * get_geometry()
+ * set_geometry()
+ * get_bios_drivedata()
+ *
+ * For today, only the partition stuff - aeb, 990515
+ */
+#include <xeno/ioctl.h>
+
+#define BLKPG _IO(0x12,105)
+
+/* The argument structure */
+struct blkpg_ioctl_arg {
+ int op;
+ int flags;
+ int datalen;
+ void *data;
+};
+
+/* The subfunctions (for the op field) */
+#define BLKPG_ADD_PARTITION 1
+#define BLKPG_DEL_PARTITION 2
+
+/* Sizes of name fields. Unused at present. */
+#define BLKPG_DEVNAMELTH 64
+#define BLKPG_VOLNAMELTH 64
+
+/* The data structure for ADD_PARTITION and DEL_PARTITION */
+struct blkpg_partition {
+ long long start; /* starting offset in bytes */
+ long long length; /* length in bytes */
+ int pno; /* partition number */
+ char devname[BLKPG_DEVNAMELTH]; /* partition name, like sda5 or c0d1p2,
+ to be used in kernel messages */
+ char volname[BLKPG_VOLNAMELTH]; /* volume label */
+};
+
+#ifdef __KERNEL__
+
+extern char * partition_name(kdev_t dev);
+extern int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_BLKPG_H */
diff --git a/xen/include/xeno/block.h b/xen/include/xeno/block.h
new file mode 100644
index 0000000000..bb80d0987b
--- /dev/null
+++ b/xen/include/xeno/block.h
@@ -0,0 +1,11 @@
+/* block.h
+ *
+ * this is the hypervisor end of the block io code.
+ */
+
+#include <hypervisor-ifs/block.h>
+
+/* vif prototypes */
+blk_ring_t *create_block_ring(int domain);
+void destroy_block_ring(struct task_struct *p);
+
diff --git a/xen/include/xeno/brlock.h b/xen/include/xeno/brlock.h
new file mode 100644
index 0000000000..208c457338
--- /dev/null
+++ b/xen/include/xeno/brlock.h
@@ -0,0 +1,220 @@
+#ifndef __LINUX_BRLOCK_H
+#define __LINUX_BRLOCK_H
+
+/*
+ * 'Big Reader' read-write spinlocks.
+ *
+ * super-fast read/write locks, with write-side penalty. The point
+ * is to have a per-CPU read/write lock. Readers lock their CPU-local
+ * readlock, writers must lock all locks to get write access. These
+ * CPU-read-write locks are semantically identical to normal rwlocks.
+ * Memory usage is higher as well. (NR_CPUS*L1_CACHE_BYTES bytes)
+ *
+ * The most important feature is that these spinlocks do not cause
+ * cacheline ping-pong in the 'most readonly data' case.
+ *
+ * Copyright 2000, Ingo Molnar <mingo@redhat.com>
+ *
+ * Registry idea and naming [ crutial! :-) ] by:
+ *
+ * David S. Miller <davem@redhat.com>
+ *
+ * David has an implementation that doesnt use atomic operations in
+ * the read branch via memory ordering tricks - i guess we need to
+ * split this up into a per-arch thing? The atomicity issue is a
+ * secondary item in profiles, at least on x86 platforms.
+ *
+ * The atomic op version overhead is indeed a big deal on
+ * load-locked/store-conditional cpus (ALPHA/MIPS/PPC) and
+ * compare-and-swap cpus (Sparc64). So we control which
+ * implementation to use with a __BRLOCK_USE_ATOMICS define. -DaveM
+ */
+
+/* Register bigreader lock indices here. */
+enum brlock_indices {
+ BR_GLOBALIRQ_LOCK,
+ BR_NETPROTO_LOCK,
+
+ __BR_END
+};
+
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+
+#if defined(__i386__) || defined(__ia64__) || defined(__x86_64__)
+#define __BRLOCK_USE_ATOMICS
+#else
+#undef __BRLOCK_USE_ATOMICS
+#endif
+
+#ifdef __BRLOCK_USE_ATOMICS
+typedef rwlock_t brlock_read_lock_t;
+#else
+typedef unsigned int brlock_read_lock_t;
+#endif
+
+/*
+ * align last allocated index to the next cacheline:
+ */
+#define __BR_IDX_MAX \
+ (((sizeof(brlock_read_lock_t)*__BR_END + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) / sizeof(brlock_read_lock_t))
+
+extern brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX];
+
+#ifndef __BRLOCK_USE_ATOMICS
+struct br_wrlock {
+ spinlock_t lock;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct br_wrlock __br_write_locks[__BR_IDX_MAX];
+#endif
+
+extern void __br_lock_usage_bug (void);
+
+#ifdef __BRLOCK_USE_ATOMICS
+
+static inline void br_read_lock (enum brlock_indices idx)
+{
+ /*
+ * This causes a link-time bug message if an
+ * invalid index is used:
+ */
+ if (idx >= __BR_END)
+ __br_lock_usage_bug();
+
+ read_lock(&__brlock_array[smp_processor_id()][idx]);
+}
+
+static inline void br_read_unlock (enum brlock_indices idx)
+{
+ if (idx >= __BR_END)
+ __br_lock_usage_bug();
+
+ read_unlock(&__brlock_array[smp_processor_id()][idx]);
+}
+
+#else /* ! __BRLOCK_USE_ATOMICS */
+static inline void br_read_lock (enum brlock_indices idx)
+{
+ unsigned int *ctr;
+ spinlock_t *lock;
+
+ /*
+ * This causes a link-time bug message if an
+ * invalid index is used:
+ */
+ if (idx >= __BR_END)
+ __br_lock_usage_bug();
+
+ ctr = &__brlock_array[smp_processor_id()][idx];
+ lock = &__br_write_locks[idx].lock;
+again:
+ (*ctr)++;
+ mb();
+ if (spin_is_locked(lock)) {
+ (*ctr)--;
+ wmb(); /*
+ * The release of the ctr must become visible
+ * to the other cpus eventually thus wmb(),
+ * we don't care if spin_is_locked is reordered
+ * before the releasing of the ctr.
+ * However IMHO this wmb() is superflous even in theory.
+ * It would not be superflous only if on the
+ * other CPUs doing a ldl_l instead of an ldl
+ * would make a difference and I don't think this is
+ * the case.
+ * I'd like to clarify this issue further
+ * but for now this is a slow path so adding the
+ * wmb() will keep us on the safe side.
+ */
+ while (spin_is_locked(lock))
+ barrier();
+ goto again;
+ }
+}
+
+static inline void br_read_unlock (enum brlock_indices idx)
+{
+ unsigned int *ctr;
+
+ if (idx >= __BR_END)
+ __br_lock_usage_bug();
+
+ ctr = &__brlock_array[smp_processor_id()][idx];
+
+ wmb();
+ (*ctr)--;
+}
+#endif /* __BRLOCK_USE_ATOMICS */
+
+/* write path not inlined - it's rare and larger */
+
+extern void FASTCALL(__br_write_lock (enum brlock_indices idx));
+extern void FASTCALL(__br_write_unlock (enum brlock_indices idx));
+
+static inline void br_write_lock (enum brlock_indices idx)
+{
+ if (idx >= __BR_END)
+ __br_lock_usage_bug();
+ __br_write_lock(idx);
+}
+
+static inline void br_write_unlock (enum brlock_indices idx)
+{
+ if (idx >= __BR_END)
+ __br_lock_usage_bug();
+ __br_write_unlock(idx);
+}
+
+#else
+# define br_read_lock(idx) ((void)(idx))
+# define br_read_unlock(idx) ((void)(idx))
+# define br_write_lock(idx) ((void)(idx))
+# define br_write_unlock(idx) ((void)(idx))
+#endif
+
+/*
+ * Now enumerate all of the possible sw/hw IRQ protected
+ * versions of the interfaces.
+ */
+#define br_read_lock_irqsave(idx, flags) \
+ do { local_irq_save(flags); br_read_lock(idx); } while (0)
+
+#define br_read_lock_irq(idx) \
+ do { local_irq_disable(); br_read_lock(idx); } while (0)
+
+#define br_read_lock_bh(idx) \
+ do { local_bh_disable(); br_read_lock(idx); } while (0)
+
+#define br_write_lock_irqsave(idx, flags) \
+ do { local_irq_save(flags); br_write_lock(idx); } while (0)
+
+#define br_write_lock_irq(idx) \
+ do { local_irq_disable(); br_write_lock(idx); } while (0)
+
+#define br_write_lock_bh(idx) \
+ do { local_bh_disable(); br_write_lock(idx); } while (0)
+
+#define br_read_unlock_irqrestore(idx, flags) \
+ do { br_read_unlock(irx); local_irq_restore(flags); } while (0)
+
+#define br_read_unlock_irq(idx) \
+ do { br_read_unlock(idx); local_irq_enable(); } while (0)
+
+#define br_read_unlock_bh(idx) \
+ do { br_read_unlock(idx); local_bh_enable(); } while (0)
+
+#define br_write_unlock_irqrestore(idx, flags) \
+ do { br_write_unlock(irx); local_irq_restore(flags); } while (0)
+
+#define br_write_unlock_irq(idx) \
+ do { br_write_unlock(idx); local_irq_enable(); } while (0)
+
+#define br_write_unlock_bh(idx) \
+ do { br_write_unlock(idx); local_bh_enable(); } while (0)
+
+#endif /* __LINUX_BRLOCK_H */
diff --git a/xen/include/xeno/byteorder/big_endian.h b/xen/include/xeno/byteorder/big_endian.h
new file mode 100644
index 0000000000..b84efd74c9
--- /dev/null
+++ b/xen/include/xeno/byteorder/big_endian.h
@@ -0,0 +1,68 @@
+#ifndef _LINUX_BYTEORDER_BIG_ENDIAN_H
+#define _LINUX_BYTEORDER_BIG_ENDIAN_H
+
+#ifndef __BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#endif
+#ifndef __BIG_ENDIAN_BITFIELD
+#define __BIG_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+
+#define __constant_htonl(x) ((__u32)(x))
+#define __constant_ntohl(x) ((__u32)(x))
+#define __constant_htons(x) ((__u16)(x))
+#define __constant_ntohs(x) ((__u16)(x))
+#define __constant_cpu_to_le64(x) ___constant_swab64((x))
+#define __constant_le64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_le32(x) ___constant_swab32((x))
+#define __constant_le32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_le16(x) ___constant_swab16((x))
+#define __constant_le16_to_cpu(x) ___constant_swab16((x))
+#define __constant_cpu_to_be64(x) ((__u64)(x))
+#define __constant_be64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_be32(x) ((__u32)(x))
+#define __constant_be32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_be16(x) ((__u16)(x))
+#define __constant_be16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_le64(x) __swab64((x))
+#define __le64_to_cpu(x) __swab64((x))
+#define __cpu_to_le32(x) __swab32((x))
+#define __le32_to_cpu(x) __swab32((x))
+#define __cpu_to_le16(x) __swab16((x))
+#define __le16_to_cpu(x) __swab16((x))
+#define __cpu_to_be64(x) ((__u64)(x))
+#define __be64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_be32(x) ((__u32)(x))
+#define __be32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_be16(x) ((__u16)(x))
+#define __be16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_le64p(x) __swab64p((x))
+#define __le64_to_cpup(x) __swab64p((x))
+#define __cpu_to_le32p(x) __swab32p((x))
+#define __le32_to_cpup(x) __swab32p((x))
+#define __cpu_to_le16p(x) __swab16p((x))
+#define __le16_to_cpup(x) __swab16p((x))
+#define __cpu_to_be64p(x) (*(__u64*)(x))
+#define __be64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_be32p(x) (*(__u32*)(x))
+#define __be32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_be16p(x) (*(__u16*)(x))
+#define __be16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_le64s(x) __swab64s((x))
+#define __le64_to_cpus(x) __swab64s((x))
+#define __cpu_to_le32s(x) __swab32s((x))
+#define __le32_to_cpus(x) __swab32s((x))
+#define __cpu_to_le16s(x) __swab16s((x))
+#define __le16_to_cpus(x) __swab16s((x))
+#define __cpu_to_be64s(x) do {} while (0)
+#define __be64_to_cpus(x) do {} while (0)
+#define __cpu_to_be32s(x) do {} while (0)
+#define __be32_to_cpus(x) do {} while (0)
+#define __cpu_to_be16s(x) do {} while (0)
+#define __be16_to_cpus(x) do {} while (0)
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */
diff --git a/xen/include/xeno/byteorder/generic.h b/xen/include/xeno/byteorder/generic.h
new file mode 100644
index 0000000000..d3d63a5a1e
--- /dev/null
+++ b/xen/include/xeno/byteorder/generic.h
@@ -0,0 +1,180 @@
+#ifndef _LINUX_BYTEORDER_GENERIC_H
+#define _LINUX_BYTEORDER_GENERIC_H
+
+/*
+ * linux/byteorder_generic.h
+ * Generic Byte-reordering support
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19970707
+ * gathered all the good ideas from all asm-foo/byteorder.h into one file,
+ * cleaned them up.
+ * I hope it is compliant with non-GCC compilers.
+ * I decided to put __BYTEORDER_HAS_U64__ in byteorder.h,
+ * because I wasn't sure it would be ok to put it in types.h
+ * Upgraded it to 2.1.43
+ * Francois-Rene Rideau <fare@tunes.org> 19971012
+ * Upgraded it to 2.1.57
+ * to please Linus T., replaced huge #ifdef's between little/big endian
+ * by nestedly #include'd files.
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ * Made it to 2.1.71; now a facelift:
+ * Put files under include/linux/byteorder/
+ * Split swab from generic support.
+ *
+ * TODO:
+ * = Regular kernel maintainers could also replace all these manual
+ * byteswap macros that remain, disseminated among drivers,
+ * after some grep or the sources...
+ * = Linus might want to rename all these macros and files to fit his taste,
+ * to fit his personal naming scheme.
+ * = it seems that a few drivers would also appreciate
+ * nybble swapping support...
+ * = every architecture could add their byteswap macro in asm/byteorder.h
+ * see how some architectures already do (i386, alpha, ppc, etc)
+ * = cpu_to_beXX and beXX_to_cpu might some day need to be well
+ * distinguished throughout the kernel. This is not the case currently,
+ * since little endian, big endian, and pdp endian machines needn't it.
+ * But this might be the case for, say, a port of Linux to 20/21 bit
+ * architectures (and F21 Linux addict around?).
+ */
+
+/*
+ * The following macros are to be defined by <asm/byteorder.h>:
+ *
+ * Conversion of long and short int between network and host format
+ * ntohl(__u32 x)
+ * ntohs(__u16 x)
+ * htonl(__u32 x)
+ * htons(__u16 x)
+ * It seems that some programs (which? where? or perhaps a standard? POSIX?)
+ * might like the above to be functions, not macros (why?).
+ * if that's true, then detect them, and take measures.
+ * Anyway, the measure is: define only ___ntohl as a macro instead,
+ * and in a separate file, have
+ * unsigned long inline ntohl(x){return ___ntohl(x);}
+ *
+ * The same for constant arguments
+ * __constant_ntohl(__u32 x)
+ * __constant_ntohs(__u16 x)
+ * __constant_htonl(__u32 x)
+ * __constant_htons(__u16 x)
+ *
+ * Conversion of XX-bit integers (16- 32- or 64-)
+ * between native CPU format and little/big endian format
+ * 64-bit stuff only defined for proper architectures
+ * cpu_to_[bl]eXX(__uXX x)
+ * [bl]eXX_to_cpu(__uXX x)
+ *
+ * The same, but takes a pointer to the value to convert
+ * cpu_to_[bl]eXXp(__uXX x)
+ * [bl]eXX_to_cpup(__uXX x)
+ *
+ * The same, but change in situ
+ * cpu_to_[bl]eXXs(__uXX x)
+ * [bl]eXX_to_cpus(__uXX x)
+ *
+ * See asm-foo/byteorder.h for examples of how to provide
+ * architecture-optimized versions
+ *
+ */
+
+
+#if defined(__KERNEL__)
+/*
+ * inside the kernel, we can use nicknames;
+ * outside of it, we must avoid POSIX namespace pollution...
+ */
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+#endif
+
+
+/*
+ * Handle ntohl and suches. These have various compatibility
+ * issues - like we want to give the prototype even though we
+ * also have a macro for them in case some strange program
+ * wants to take the address of the thing or something..
+ *
+ * Note that these used to return a "long" in libc5, even though
+ * long is often 64-bit these days.. Thus the casts.
+ *
+ * They have to be macros in order to do the constant folding
+ * correctly - if the argument passed into a inline function
+ * it is no longer constant according to gcc..
+ */
+
+#undef ntohl
+#undef ntohs
+#undef htonl
+#undef htons
+
+/*
+ * Do the prototypes. Somebody might want to take the
+ * address or some such sick thing..
+ */
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+extern __u32 ntohl(__u32);
+extern __u32 htonl(__u32);
+#else
+extern unsigned long int ntohl(unsigned long int);
+extern unsigned long int htonl(unsigned long int);
+#endif
+extern unsigned short int ntohs(unsigned short int);
+extern unsigned short int htons(unsigned short int);
+
+
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+
+#define ___htonl(x) __cpu_to_be32(x)
+#define ___htons(x) __cpu_to_be16(x)
+#define ___ntohl(x) __be32_to_cpu(x)
+#define ___ntohs(x) __be16_to_cpu(x)
+
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+#define htonl(x) ___htonl(x)
+#define ntohl(x) ___ntohl(x)
+#else
+#define htonl(x) ((unsigned long)___htonl(x))
+#define ntohl(x) ((unsigned long)___ntohl(x))
+#endif
+#define htons(x) ___htons(x)
+#define ntohs(x) ___ntohs(x)
+
+#endif /* OPTIMIZE */
+
+
+#endif /* _LINUX_BYTEORDER_GENERIC_H */
diff --git a/xen/include/xeno/byteorder/little_endian.h b/xen/include/xeno/byteorder/little_endian.h
new file mode 100644
index 0000000000..1431663621
--- /dev/null
+++ b/xen/include/xeno/byteorder/little_endian.h
@@ -0,0 +1,68 @@
+#ifndef _LINUX_BYTEORDER_LITTLE_ENDIAN_H
+#define _LINUX_BYTEORDER_LITTLE_ENDIAN_H
+
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN 1234
+#endif
+#ifndef __LITTLE_ENDIAN_BITFIELD
+#define __LITTLE_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+
+#define __constant_htonl(x) ___constant_swab32((x))
+#define __constant_ntohl(x) ___constant_swab32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) ((__u64)(x))
+#define __constant_le64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_le32(x) ((__u32)(x))
+#define __constant_le32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_le16(x) ((__u16)(x))
+#define __constant_le16_to_cpu(x) ((__u16)(x))
+#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) ((__u64)(x))
+#define __le64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_le32(x) ((__u32)(x))
+#define __le32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_le16(x) ((__u16)(x))
+#define __le16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_be64(x) __swab64((x))
+#define __be64_to_cpu(x) __swab64((x))
+#define __cpu_to_be32(x) __swab32((x))
+#define __be32_to_cpu(x) __swab32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) (*(__u64*)(x))
+#define __le64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_le32p(x) (*(__u32*)(x))
+#define __le32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) __swab64p((x))
+#define __be64_to_cpup(x) __swab64p((x))
+#define __cpu_to_be32p(x) __swab32p((x))
+#define __be32_to_cpup(x) __swab32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) do {} while (0)
+#define __le64_to_cpus(x) do {} while (0)
+#define __cpu_to_le32s(x) do {} while (0)
+#define __le32_to_cpus(x) do {} while (0)
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) __swab64s((x))
+#define __be64_to_cpus(x) __swab64s((x))
+#define __cpu_to_be32s(x) __swab32s((x))
+#define __be32_to_cpus(x) __swab32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */
diff --git a/xen/include/xeno/byteorder/pdp_endian.h b/xen/include/xeno/byteorder/pdp_endian.h
new file mode 100644
index 0000000000..618631cbc6
--- /dev/null
+++ b/xen/include/xeno/byteorder/pdp_endian.h
@@ -0,0 +1,88 @@
+#ifndef _LINUX_BYTEORDER_PDP_ENDIAN_H
+#define _LINUX_BYTEORDER_PDP_ENDIAN_H
+
+/*
+ * Could have been named NUXI-endian, but we use the same name as in glibc.
+ * hopefully only the PDP and its evolutions (old VAXen in compatibility mode)
+ * should ever use this braindead byteorder.
+ * This file *should* work, but has not been tested.
+ *
+ * little-endian is 1234; big-endian is 4321; nuxi/pdp-endian is 3412
+ *
+ * I thought vaxen were NUXI-endian, but was told they were correct-endian
+ * (little-endian), though indeed there existed NUXI-endian machines
+ * (DEC PDP-11 and old VAXen in compatibility mode).
+ * This makes this file a bit useless, but as a proof-of-concept.
+ *
+ * But what does a __u64 look like: is it 34127856 or 78563412 ???
+ * I don't dare imagine! Hence, no 64-bit byteorder support yet.
+ * Hopefully, there 64-bit pdp-endian support shouldn't ever be required.
+ *
+ */
+
+#ifndef __PDP_ENDIAN
+#define __PDP_ENDIAN 3412
+#endif
+#ifndef __PDP_ENDIAN_BITFIELD
+#define __PDP_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
+
+#define __constant_htonl(x) ___constant_swahb32((x))
+#define __constant_ntohl(x) ___constant_swahb32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) I DON'T KNOW
+#define __constant_le64_to_cpu(x) I DON'T KNOW
+#define __constant_cpu_to_le32(x) ___constant_swahw32((x))
+#define __constant_le32_to_cpu(x) ___constant_swahw32((x))
+#define __constant_cpu_to_le16(x) ((__u16)(x)
+#define __constant_le16_to_cpu(x) ((__u16)(x)
+#define __constant_cpu_to_be64(x) I DON'T KNOW
+#define __constant_be64_to_cpu(x) I DON'T KNOW
+#define __constant_cpu_to_be32(x) ___constant_swahb32((x))
+#define __constant_be32_to_cpu(x) ___constant_swahb32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) I DON'T KNOW
+#define __le64_to_cpu(x) I DON'T KNOW
+#define __cpu_to_le32(x) ___swahw32((x))
+#define __le32_to_cpu(x) ___swahw32((x))
+#define __cpu_to_le16(x) ((__u16)(x)
+#define __le16_to_cpu(x) ((__u16)(x)
+#define __cpu_to_be64(x) I DON'T KNOW
+#define __be64_to_cpu(x) I DON'T KNOW
+#define __cpu_to_be32(x) __swahb32((x))
+#define __be32_to_cpu(x) __swahb32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) I DON'T KNOW
+#define __le64_to_cpup(x) I DON'T KNOW
+#define __cpu_to_le32p(x) ___swahw32p((x))
+#define __le32_to_cpup(x) ___swahw32p((x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) I DON'T KNOW
+#define __be64_to_cpup(x) I DON'T KNOW
+#define __cpu_to_be32p(x) __swahb32p((x))
+#define __be32_to_cpup(x) __swahb32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) I DON'T KNOW
+#define __le64_to_cpus(x) I DON'T KNOW
+#define __cpu_to_le32s(x) ___swahw32s((x))
+#define __le32_to_cpus(x) ___swahw32s((x))
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) I DON'T KNOW
+#define __be64_to_cpus(x) I DON'T KNOW
+#define __cpu_to_be32s(x) __swahb32s((x))
+#define __be32_to_cpus(x) __swahb32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_PDP_ENDIAN_H */
diff --git a/xen/include/xeno/byteorder/swab.h b/xen/include/xeno/byteorder/swab.h
new file mode 100644
index 0000000000..814b4519ff
--- /dev/null
+++ b/xen/include/xeno/byteorder/swab.h
@@ -0,0 +1,190 @@
+#ifndef _LINUX_BYTEORDER_SWAB_H
+#define _LINUX_BYTEORDER_SWAB_H
+
+/*
+ * linux/byteorder/swab.h
+ * Byte-swapping, independently from CPU endianness
+ * swabXX[ps]?(foo)
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ * separated swab functions from cpu_to_XX,
+ * to clean up support for bizarre-endian architectures.
+ *
+ * See asm-i386/byteorder.h and suches for examples of how to provide
+ * architecture-dependent optimized versions
+ *
+ */
+
+/* casts are necessary for constants, because we never know how for sure
+ * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
+ */
+#define ___swab16(x) \
+({ \
+ __u16 __x = (x); \
+ ((__u16)( \
+ (((__u16)(__x) & (__u16)0x00ffU) << 8) | \
+ (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
+})
+
+#define ___swab32(x) \
+({ \
+ __u32 __x = (x); \
+ ((__u32)( \
+ (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
+ (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \
+ (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \
+ (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
+})
+
+#define ___swab64(x) \
+({ \
+ __u64 __x = (x); \
+ ((__u64)( \
+ (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \
+ (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \
+ (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+ (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \
+ (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \
+ (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+ (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+ (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
+})
+
+#define ___constant_swab16(x) \
+ ((__u16)( \
+ (((__u16)(x) & (__u16)0x00ffU) << 8) | \
+ (((__u16)(x) & (__u16)0xff00U) >> 8) ))
+#define ___constant_swab32(x) \
+ ((__u32)( \
+ (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+ (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
+ (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
+ (((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
+#define ___constant_swab64(x) \
+ ((__u64)( \
+ (__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
+ (__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
+ (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+ (__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \
+ (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \
+ (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+ (__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+ (__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) ))
+
+/*
+ * provide defaults when no architecture-specific optimization is detected
+ */
+#ifndef __arch__swab16
+# define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); })
+#endif
+#ifndef __arch__swab32
+# define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); })
+#endif
+#ifndef __arch__swab64
+# define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); })
+#endif
+
+#ifndef __arch__swab16p
+# define __arch__swab16p(x) __arch__swab16(*(x))
+#endif
+#ifndef __arch__swab32p
+# define __arch__swab32p(x) __arch__swab32(*(x))
+#endif
+#ifndef __arch__swab64p
+# define __arch__swab64p(x) __arch__swab64(*(x))
+#endif
+
+#ifndef __arch__swab16s
+# define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0)
+#endif
+#ifndef __arch__swab32s
+# define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0)
+#endif
+#ifndef __arch__swab64s
+# define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0)
+#endif
+
+
+/*
+ * Allow constant folding
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+# define __swab16(x) \
+(__builtin_constant_p((__u16)(x)) ? \
+ ___swab16((x)) : \
+ __fswab16((x)))
+# define __swab32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swab32((x)) : \
+ __fswab32((x)))
+# define __swab64(x) \
+(__builtin_constant_p((__u64)(x)) ? \
+ ___swab64((x)) : \
+ __fswab64((x)))
+#else
+# define __swab16(x) __fswab16(x)
+# define __swab32(x) __fswab32(x)
+# define __swab64(x) __fswab64(x)
+#endif /* OPTIMIZE */
+
+
+static __inline__ __const__ __u16 __fswab16(__u16 x)
+{
+ return __arch__swab16(x);
+}
+static __inline__ __u16 __swab16p(__u16 *x)
+{
+ return __arch__swab16p(x);
+}
+static __inline__ void __swab16s(__u16 *addr)
+{
+ __arch__swab16s(addr);
+}
+
+static __inline__ __const__ __u32 __fswab32(__u32 x)
+{
+ return __arch__swab32(x);
+}
+static __inline__ __u32 __swab32p(__u32 *x)
+{
+ return __arch__swab32p(x);
+}
+static __inline__ void __swab32s(__u32 *addr)
+{
+ __arch__swab32s(addr);
+}
+
+#ifdef __BYTEORDER_HAS_U64__
+static __inline__ __const__ __u64 __fswab64(__u64 x)
+{
+# ifdef __SWAB_64_THRU_32__
+ __u32 h = x >> 32;
+ __u32 l = x & ((1ULL<<32)-1);
+ return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h)));
+# else
+ return __arch__swab64(x);
+# endif
+}
+static __inline__ __u64 __swab64p(__u64 *x)
+{
+ return __arch__swab64p(x);
+}
+static __inline__ void __swab64s(__u64 *addr)
+{
+ __arch__swab64s(addr);
+}
+#endif /* __BYTEORDER_HAS_U64__ */
+
+#if defined(__KERNEL__)
+#define swab16 __swab16
+#define swab32 __swab32
+#define swab64 __swab64
+#define swab16p __swab16p
+#define swab32p __swab32p
+#define swab64p __swab64p
+#define swab16s __swab16s
+#define swab32s __swab32s
+#define swab64s __swab64s
+#endif
+
+#endif /* _LINUX_BYTEORDER_SWAB_H */
diff --git a/xen/include/xeno/byteorder/swabb.h b/xen/include/xeno/byteorder/swabb.h
new file mode 100644
index 0000000000..d28d9a804d
--- /dev/null
+++ b/xen/include/xeno/byteorder/swabb.h
@@ -0,0 +1,137 @@
+#ifndef _LINUX_BYTEORDER_SWABB_H
+#define _LINUX_BYTEORDER_SWABB_H
+
+/*
+ * linux/byteorder/swabb.h
+ * SWAp Bytes Bizarrely
+ * swaHHXX[ps]?(foo)
+ *
+ * Support for obNUXIous pdp-endian and other bizarre architectures.
+ * Will Linux ever run on such ancient beasts? if not, this file
+ * will be but a programming pearl. Still, it's a reminder that we
+ * shouldn't be making too many assumptions when trying to be portable.
+ *
+ */
+
+/*
+ * Meaning of the names I chose (vaxlinux people feel free to correct them):
+ * swahw32 swap 16-bit half-words in a 32-bit word
+ * swahb32 swap 8-bit halves of each 16-bit half-word in a 32-bit word
+ *
+ * No 64-bit support yet. I don't know NUXI conventions for long longs.
+ * I guarantee it will be a mess when it's there, though :->
+ * It will be even worse if there are conflicting 64-bit conventions.
+ * Hopefully, no one ever used 64-bit objects on NUXI machines.
+ *
+ */
+
+#define ___swahw32(x) \
+({ \
+ __u32 __x = (x); \
+ ((__u32)( \
+ (((__u32)(__x) & (__u32)0x0000ffffUL) << 16) | \
+ (((__u32)(__x) & (__u32)0xffff0000UL) >> 16) )); \
+})
+#define ___swahb32(x) \
+({ \
+ __u32 __x = (x); \
+ ((__u32)( \
+ (((__u32)(__x) & (__u32)0x00ff00ffUL) << 8) | \
+ (((__u32)(__x) & (__u32)0xff00ff00UL) >> 8) )); \
+})
+
+#define ___constant_swahw32(x) \
+ ((__u32)( \
+ (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
+ (((__u32)(x) & (__u32)0xffff0000UL) >> 16) ))
+#define ___constant_swahb32(x) \
+ ((__u32)( \
+ (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
+ (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) ))
+
+/*
+ * provide defaults when no architecture-specific optimization is detected
+ */
+#ifndef __arch__swahw32
+# define __arch__swahw32(x) ___swahw32(x)
+#endif
+#ifndef __arch__swahb32
+# define __arch__swahb32(x) ___swahb32(x)
+#endif
+
+#ifndef __arch__swahw32p
+# define __arch__swahw32p(x) __swahw32(*(x))
+#endif
+#ifndef __arch__swahb32p
+# define __arch__swahb32p(x) __swahb32(*(x))
+#endif
+
+#ifndef __arch__swahw32s
+# define __arch__swahw32s(x) do { *(x) = __swahw32p((x)); } while (0)
+#endif
+#ifndef __arch__swahb32s
+# define __arch__swahb32s(x) do { *(x) = __swahb32p((x)); } while (0)
+#endif
+
+
+/*
+ * Allow constant folding
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+# define __swahw32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swahw32((x)) : \
+ __fswahw32((x)))
+# define __swahb32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swahb32((x)) : \
+ __fswahb32((x)))
+#else
+# define __swahw32(x) __fswahw32(x)
+# define __swahb32(x) __fswahb32(x)
+#endif /* OPTIMIZE */
+
+
+static __inline__ __const__ __u32 __fswahw32(__u32 x)
+{
+ return __arch__swahw32(x);
+}
+static __inline__ __u32 __swahw32p(__u32 *x)
+{
+ return __arch__swahw32p(x);
+}
+static __inline__ void __swahw32s(__u32 *addr)
+{
+ __arch__swahw32s(addr);
+}
+
+
+static __inline__ __const__ __u32 __fswahb32(__u32 x)
+{
+ return __arch__swahb32(x);
+}
+static __inline__ __u32 __swahb32p(__u32 *x)
+{
+ return __arch__swahb32p(x);
+}
+static __inline__ void __swahb32s(__u32 *addr)
+{
+ __arch__swahb32s(addr);
+}
+
+#ifdef __BYTEORDER_HAS_U64__
+/*
+ * Not supported yet
+ */
+#endif /* __BYTEORDER_HAS_U64__ */
+
+#if defined(__KERNEL__)
+#define swahw32 __swahw32
+#define swahb32 __swahb32
+#define swahw32p __swahw32p
+#define swahb32p __swahb32p
+#define swahw32s __swahw32s
+#define swahb32s __swahb32s
+#endif
+
+#endif /* _LINUX_BYTEORDER_SWABB_H */
diff --git a/xen/include/xeno/cache.h b/xen/include/xeno/cache.h
new file mode 100644
index 0000000000..73a3be7f9f
--- /dev/null
+++ b/xen/include/xeno/cache.h
@@ -0,0 +1,37 @@
+#ifndef __LINUX_CACHE_H
+#define __LINUX_CACHE_H
+
+#include <xeno/config.h>
+#include <asm/cache.h>
+
+#ifndef L1_CACHE_ALIGN
+#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
+#endif
+
+#ifndef SMP_CACHE_BYTES
+#define SMP_CACHE_BYTES L1_CACHE_BYTES
+#endif
+
+#ifndef ____cacheline_aligned
+#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#endif
+
+#ifndef ____cacheline_aligned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+#else
+#define ____cacheline_aligned_in_smp
+#endif /* CONFIG_SMP */
+#endif
+
+#ifndef __cacheline_aligned
+#ifdef MODULE
+#define __cacheline_aligned ____cacheline_aligned
+#else
+#define __cacheline_aligned \
+ __attribute__((__aligned__(SMP_CACHE_BYTES), \
+ __section__(".data.cacheline_aligned")))
+#endif
+#endif /* __cacheline_aligned */
+
+#endif /* __LINUX_CACHE_H */
diff --git a/xen/include/xeno/config.h b/xen/include/xeno/config.h
new file mode 100644
index 0000000000..7d7205b69a
--- /dev/null
+++ b/xen/include/xeno/config.h
@@ -0,0 +1,136 @@
+/******************************************************************************
+ * config.h
+ *
+ * A Linux-style configuration list.
+ */
+
+#ifndef __XENO_CONFIG_H__
+#define __XENO_CONFIG_H__
+
+#define CONFIG_X86 1
+
+#define CONFIG_SMP 1
+#define CONFIG_X86_LOCAL_APIC 1
+#define CONFIG_X86_IO_APIC 1
+#define CONFIG_X86_L1_CACHE_SHIFT 5
+
+#define CONFIG_PCI 1
+#define CONFIG_PCI_BIOS 1
+#define CONFIG_PCI_DIRECT 1
+
+#define CONFIG_IDE 1
+#define CONFIG_BLK_DEV_IDE 1
+#define CONFIG_BLK_DEV_IDEDMA 1
+#define CONFIG_BLK_DEV_IDEPCI 1
+#define CONFIG_IDEDISK_MULTI_MODE 1
+#define CONFIG_IDEDISK_STROKE 1
+#define CONFIG_IDEPCI_SHARE_IRQ 1
+#define CONFIG_BLK_DEV_IDEDMA_PCI 1
+#define CONFIG_IDEDMA_PCI_AUTO 1
+#define CONFIG_IDEDMA_AUTO 1
+#define CONFIG_BLK_DEV_IDE_MODES 1
+#define CONFIG_BLK_DEV_PIIX 1
+
+#define CONFIG_SCSI 1
+#define CONFIG_BLK_DEV_SD 1
+#define CONFIG_SD_EXTRA_DEVS 40
+#define CONFIG_SCSI_MULTI_LUN 1
+
+#define HZ 100
+
+/* Just to keep compiler happy. */
+#define SMP_CACHE_BYTES 64
+#define NR_CPUS 16
+#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define ____cacheline_aligned __cacheline_aligned
+
+/*** Hypervisor owns top 64MB of virtual address space. ***/
+#define HYPERVISOR_VIRT_START (0xFC000000UL)
+
+/*
+ * First 4MB are mapped read-only for all. It's for the machine->physical
+ * mapping table (MPT table). The following are virtual addresses.
+ */
+#define READONLY_MPT_VIRT_START (HYPERVISOR_VIRT_START)
+#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (4*1024*1024))
+/*
+ * Next 16MB is fixed monitor space, which is part of a 48MB direct-mapped
+ * memory region. The following are machine addresses.
+ */
+#define MAX_MONITOR_ADDRESS (16*1024*1024)
+#define MAX_DMA_ADDRESS (16*1024*1024)
+#define MAX_DIRECTMAP_ADDRESS (48*1024*1024)
+/* And the virtual addresses for the direct-map region... */
+#define DIRECTMAP_VIRT_START (READONLY_MPT_VIRT_END)
+#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
+#define MONITOR_VIRT_START (DIRECTMAP_VIRT_START)
+#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
+#define RDWR_MPT_VIRT_START (MONITOR_VIRT_END)
+#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (4*1024*1024))
+#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
+#define FRAMETABLE_VIRT_END (DIRECTMAP_VIRT_END)
+/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
+#define PERDOMAIN_VIRT_START (DIRECTMAP_VIRT_END)
+#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (4*1024*1024))
+/* Penultimate 4MB of virtual address space used for domain page mappings. */
+#define MAPCACHE_VIRT_START (PERDOMAIN_VIRT_END)
+#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + (4*1024*1024))
+/* Final 4MB of virtual address space used for ioremap(). */
+#define IOREMAP_VIRT_START (MAPCACHE_VIRT_END)
+#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (4*1024*1024))
+
+/* Linkage for x86 */
+#define FASTCALL(x) x __attribute__((regparm(3)))
+#define asmlinkage __attribute__((regparm(0)))
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#define SYMBOL_NAME_STR(X) #X
+#define SYMBOL_NAME(X) X
+#define SYMBOL_NAME_LABEL(X) X##:
+#ifdef __ASSEMBLY__
+#define ALIGN __ALIGN
+#define ALIGN_STR __ALIGN_STR
+#define ENTRY(name) \
+ .globl SYMBOL_NAME(name); \
+ ALIGN; \
+ SYMBOL_NAME_LABEL(name)
+#endif
+
+/* syslog levels ==> nothing! */
+#define KERN_NOTICE
+#define KERN_WARNING
+#define KERN_DEBUG
+#define KERN_INFO
+#define KERN_ERR
+#define KERN_CRIT
+#define KERN_EMERG
+#define KERN_ALERT
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define __HYPERVISOR_CS 0x30
+#define __HYPERVISOR_DS 0x38
+#define __GUEST_CS 0x11
+#define __GUEST_DS 0x19
+
+#define NR_syscalls 255
+
+#define offsetof(_p,_f) ((unsigned long)&(((_p *)0)->_f))
+#define struct_cpy(_x,_y) (memcpy((_x),(_y),sizeof(*(_x))))
+
+#define likely(_x) (_x)
+#define unlikely(_x) (_x)
+
+#define dev_probe_lock() ((void)0)
+#define dev_probe_unlock() ((void)0)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define capable(_c) 0
+
+#ifndef __ASSEMBLY__
+extern void __out_of_line_bug(int line) __attribute__((noreturn));
+#define out_of_line_bug() __out_of_line_bug(__LINE__)
+#endif
+
+#endif /* __XENO_CONFIG_H__ */
diff --git a/xen/include/xeno/ctype.h b/xen/include/xeno/ctype.h
new file mode 100644
index 0000000000..afa3639229
--- /dev/null
+++ b/xen/include/xeno/ctype.h
@@ -0,0 +1,54 @@
+#ifndef _LINUX_CTYPE_H
+#define _LINUX_CTYPE_H
+
+/*
+ * NOTE! This ctype does not handle EOF like the standard C
+ * library is required to.
+ */
+
+#define _U 0x01 /* upper */
+#define _L 0x02 /* lower */
+#define _D 0x04 /* digit */
+#define _C 0x08 /* cntrl */
+#define _P 0x10 /* punct */
+#define _S 0x20 /* white space (space/lf/tab) */
+#define _X 0x40 /* hex digit */
+#define _SP 0x80 /* hard space (0x20) */
+
+extern unsigned char _ctype[];
+
+#define __ismask(x) (_ctype[(int)(unsigned char)(x)])
+
+#define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0)
+#define isalpha(c) ((__ismask(c)&(_U|_L)) != 0)
+#define iscntrl(c) ((__ismask(c)&(_C)) != 0)
+#define isdigit(c) ((__ismask(c)&(_D)) != 0)
+#define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0)
+#define islower(c) ((__ismask(c)&(_L)) != 0)
+#define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
+#define ispunct(c) ((__ismask(c)&(_P)) != 0)
+#define isspace(c) ((__ismask(c)&(_S)) != 0)
+#define isupper(c) ((__ismask(c)&(_U)) != 0)
+#define isxdigit(c) ((__ismask(c)&(_D|_X)) != 0)
+
+#define isascii(c) (((unsigned char)(c))<=0x7f)
+#define toascii(c) (((unsigned char)(c))&0x7f)
+
+static inline unsigned char __tolower(unsigned char c)
+{
+ if (isupper(c))
+ c -= 'A'-'a';
+ return c;
+}
+
+static inline unsigned char __toupper(unsigned char c)
+{
+ if (islower(c))
+ c -= 'a'-'A';
+ return c;
+}
+
+#define tolower(c) __tolower(c)
+#define toupper(c) __toupper(c)
+
+#endif
diff --git a/xen/include/xeno/delay.h b/xen/include/xeno/delay.h
new file mode 100644
index 0000000000..9d70ef035f
--- /dev/null
+++ b/xen/include/xeno/delay.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_DELAY_H
+#define _LINUX_DELAY_H
+
+/* Copyright (C) 1993 Linus Torvalds */
+
+#include <asm/delay.h>
+#define mdelay(n) (\
+ {unsigned long msec=(n); while (msec--) udelay(1000);})
+
+#endif /* defined(_LINUX_DELAY_H) */
diff --git a/xen/include/xeno/dom0_ops.h b/xen/include/xeno/dom0_ops.h
new file mode 100644
index 0000000000..49a5842fab
--- /dev/null
+++ b/xen/include/xeno/dom0_ops.h
@@ -0,0 +1,63 @@
+/******************************************************************************
+ * dom0_ops.h
+ *
+ * Process command requests from domain-0 guest OS.
+ *
+ * Copyright (c) 2002, K A Fraser, B Dragovic
+ */
+
+#ifndef __DOM0_OPS_H__
+#define __DOM0_OPS_H__
+
+#define DOM0_NEWDOMAIN 0
+#define DOM0_KILLDOMAIN 1
+#define DOM0_GETMEMLIST 2
+#define DOM0_STARTDOM 4
+
+#define MAX_CMD_LEN 256
+
+typedef struct dom0_newdomain_st
+{
+ unsigned int domain; // return parameter
+ unsigned int memory_kb;
+ unsigned int num_vifs; // temporary
+ unsigned long pg_head; // return parameter
+} dom0_newdomain_t;
+
+typedef struct dom0_killdomain_st
+{
+ unsigned int domain;
+} dom0_killdomain_t;
+
+typedef struct dom0_getmemlist_st
+{
+ unsigned long start_pfn;
+ unsigned long num_pfns;
+ void *buffer;
+} dom0_getmemlist_t;
+
+typedef struct domain_launch
+{
+ unsigned int domain;
+ unsigned long l2_pgt_addr;
+ unsigned long virt_load_addr;
+ unsigned long virt_shinfo_addr;
+ unsigned long virt_startinfo_addr;
+ unsigned int num_vifs;
+ char cmd_line[MAX_CMD_LEN];
+} dom_meminfo_t;
+
+typedef struct dom0_op_st
+{
+ unsigned long cmd;
+ union
+ {
+ dom0_newdomain_t newdomain;
+ dom0_killdomain_t killdomain;
+ dom0_getmemlist_t getmemlist;
+ dom_meminfo_t meminfo;
+ }
+ u;
+} dom0_op_t;
+
+#endif
diff --git a/xen/include/xeno/elevator.h b/xen/include/xeno/elevator.h
new file mode 100644
index 0000000000..1a8bb5c39a
--- /dev/null
+++ b/xen/include/xeno/elevator.h
@@ -0,0 +1,104 @@
+#ifndef _LINUX_ELEVATOR_H
+#define _LINUX_ELEVATOR_H
+
+typedef void (elevator_fn) (struct request *, elevator_t *,
+ struct list_head *,
+ struct list_head *, int);
+
+typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
+ struct buffer_head *, int, int);
+
+typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
+
+typedef void (elevator_merge_req_fn) (struct request *, struct request *);
+
+struct elevator_s
+{
+ int read_latency;
+ int write_latency;
+
+ elevator_merge_fn *elevator_merge_fn;
+ elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
+ elevator_merge_req_fn *elevator_merge_req_fn;
+
+ unsigned int queue_ID;
+};
+
+int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_noop_merge_req(struct request *, struct request *);
+
+int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_linus_merge_req(struct request *, struct request *);
+
+typedef struct blkelv_ioctl_arg_s {
+ int queue_ID;
+ int read_latency;
+ int write_latency;
+ int max_bomb_segments;
+} blkelv_ioctl_arg_t;
+
+#define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t))
+#define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t))
+
+extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *);
+extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *);
+
+extern void elevator_init(elevator_t *, elevator_t);
+
+/*
+ * Return values from elevator merger
+ */
+#define ELEVATOR_NO_MERGE 0
+#define ELEVATOR_FRONT_MERGE 1
+#define ELEVATOR_BACK_MERGE 2
+
+/*
+ * This is used in the elevator algorithm. We don't prioritise reads
+ * over writes any more --- although reads are more time-critical than
+ * writes, by treating them equally we increase filesystem throughput.
+ * This turns out to give better overall performance. -- sct
+ */
+#define IN_ORDER(s1,s2) \
+ ((((s1)->rq_dev == (s2)->rq_dev && \
+ (s1)->sector < (s2)->sector)) || \
+ (s1)->rq_dev < (s2)->rq_dev)
+
+#define BHRQ_IN_ORDER(bh, rq) \
+ ((((bh)->b_rdev == (rq)->rq_dev && \
+ (bh)->b_rsector < (rq)->sector)) || \
+ (bh)->b_rdev < (rq)->rq_dev)
+
+static inline int elevator_request_latency(elevator_t * elevator, int rw)
+{
+ int latency;
+
+ latency = elevator->read_latency;
+ if (rw != READ)
+ latency = elevator->write_latency;
+
+ return latency;
+}
+
+#define ELEVATOR_NOOP \
+((elevator_t) { \
+ 0, /* read_latency */ \
+ 0, /* write_latency */ \
+ \
+ elevator_noop_merge, /* elevator_merge_fn */ \
+ elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \
+ elevator_noop_merge_req, /* elevator_merge_req_fn */ \
+ })
+
+#define ELEVATOR_LINUS \
+((elevator_t) { \
+ 8192, /* read passovers */ \
+ 16384, /* write passovers */ \
+ \
+ elevator_linus_merge, /* elevator_merge_fn */ \
+ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \
+ elevator_linus_merge_req, /* elevator_merge_req_fn */ \
+ })
+
+#endif
diff --git a/xen/include/xeno/errno.h b/xen/include/xeno/errno.h
new file mode 100644
index 0000000000..7cf599f4de
--- /dev/null
+++ b/xen/include/xeno/errno.h
@@ -0,0 +1,132 @@
+#ifndef _I386_ERRNO_H
+#define _I386_ERRNO_H
+
+#define EPERM 1 /* Operation not permitted */
+#define ENOENT 2 /* No such file or directory */
+#define ESRCH 3 /* No such process */
+#define EINTR 4 /* Interrupted system call */
+#define EIO 5 /* I/O error */
+#define ENXIO 6 /* No such device or address */
+#define E2BIG 7 /* Arg list too long */
+#define ENOEXEC 8 /* Exec format error */
+#define EBADF 9 /* Bad file number */
+#define ECHILD 10 /* No child processes */
+#define EAGAIN 11 /* Try again */
+#define ENOMEM 12 /* Out of memory */
+#define EACCES 13 /* Permission denied */
+#define EFAULT 14 /* Bad address */
+#define ENOTBLK 15 /* Block device required */
+#define EBUSY 16 /* Device or resource busy */
+#define EEXIST 17 /* File exists */
+#define EXDEV 18 /* Cross-device link */
+#define ENODEV 19 /* No such device */
+#define ENOTDIR 20 /* Not a directory */
+#define EISDIR 21 /* Is a directory */
+#define EINVAL 22 /* Invalid argument */
+#define ENFILE 23 /* File table overflow */
+#define EMFILE 24 /* Too many open files */
+#define ENOTTY 25 /* Not a typewriter */
+#define ETXTBSY 26 /* Text file busy */
+#define EFBIG 27 /* File too large */
+#define ENOSPC 28 /* No space left on device */
+#define ESPIPE 29 /* Illegal seek */
+#define EROFS 30 /* Read-only file system */
+#define EMLINK 31 /* Too many links */
+#define EPIPE 32 /* Broken pipe */
+#define EDOM 33 /* Math argument out of domain of func */
+#define ERANGE 34 /* Math result not representable */
+#define EDEADLK 35 /* Resource deadlock would occur */
+#define ENAMETOOLONG 36 /* File name too long */
+#define ENOLCK 37 /* No record locks available */
+#define ENOSYS 38 /* Function not implemented */
+#define ENOTEMPTY 39 /* Directory not empty */
+#define ELOOP 40 /* Too many symbolic links encountered */
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define ENOMSG 42 /* No message of desired type */
+#define EIDRM 43 /* Identifier removed */
+#define ECHRNG 44 /* Channel number out of range */
+#define EL2NSYNC 45 /* Level 2 not synchronized */
+#define EL3HLT 46 /* Level 3 halted */
+#define EL3RST 47 /* Level 3 reset */
+#define ELNRNG 48 /* Link number out of range */
+#define EUNATCH 49 /* Protocol driver not attached */
+#define ENOCSI 50 /* No CSI structure available */
+#define EL2HLT 51 /* Level 2 halted */
+#define EBADE 52 /* Invalid exchange */
+#define EBADR 53 /* Invalid request descriptor */
+#define EXFULL 54 /* Exchange full */
+#define ENOANO 55 /* No anode */
+#define EBADRQC 56 /* Invalid request code */
+#define EBADSLT 57 /* Invalid slot */
+
+#define EDEADLOCK EDEADLK
+
+#define EBFONT 59 /* Bad font file format */
+#define ENOSTR 60 /* Device not a stream */
+#define ENODATA 61 /* No data available */
+#define ETIME 62 /* Timer expired */
+#define ENOSR 63 /* Out of streams resources */
+#define ENONET 64 /* Machine is not on the network */
+#define ENOPKG 65 /* Package not installed */
+#define EREMOTE 66 /* Object is remote */
+#define ENOLINK 67 /* Link has been severed */
+#define EADV 68 /* Advertise error */
+#define ESRMNT 69 /* Srmount error */
+#define ECOMM 70 /* Communication error on send */
+#define EPROTO 71 /* Protocol error */
+#define EMULTIHOP 72 /* Multihop attempted */
+#define EDOTDOT 73 /* RFS specific error */
+#define EBADMSG 74 /* Not a data message */
+#define EOVERFLOW 75 /* Value too large for defined data type */
+#define ENOTUNIQ 76 /* Name not unique on network */
+#define EBADFD 77 /* File descriptor in bad state */
+#define EREMCHG 78 /* Remote address changed */
+#define ELIBACC 79 /* Can not access a needed shared library */
+#define ELIBBAD 80 /* Accessing a corrupted shared library */
+#define ELIBSCN 81 /* .lib section in a.out corrupted */
+#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
+#define ELIBEXEC 83 /* Cannot exec a shared library directly */
+#define EILSEQ 84 /* Illegal byte sequence */
+#define ERESTART 85 /* Interrupted system call should be restarted */
+#define ESTRPIPE 86 /* Streams pipe error */
+#define EUSERS 87 /* Too many users */
+#define ENOTSOCK 88 /* Socket operation on non-socket */
+#define EDESTADDRREQ 89 /* Destination address required */
+#define EMSGSIZE 90 /* Message too long */
+#define EPROTOTYPE 91 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 92 /* Protocol not available */
+#define EPROTONOSUPPORT 93 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
+#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
+#define EPFNOSUPPORT 96 /* Protocol family not supported */
+#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
+#define EADDRINUSE 98 /* Address already in use */
+#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
+#define ENETDOWN 100 /* Network is down */
+#define ENETUNREACH 101 /* Network is unreachable */
+#define ENETRESET 102 /* Network dropped connection because of reset */
+#define ECONNABORTED 103 /* Software caused connection abort */
+#define ECONNRESET 104 /* Connection reset by peer */
+#define ENOBUFS 105 /* No buffer space available */
+#define EISCONN 106 /* Transport endpoint is already connected */
+#define ENOTCONN 107 /* Transport endpoint is not connected */
+#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
+#define ETOOMANYREFS 109 /* Too many references: cannot splice */
+#define ETIMEDOUT 110 /* Connection timed out */
+#define ECONNREFUSED 111 /* Connection refused */
+#define EHOSTDOWN 112 /* Host is down */
+#define EHOSTUNREACH 113 /* No route to host */
+#define EALREADY 114 /* Operation already in progress */
+#define EINPROGRESS 115 /* Operation now in progress */
+#define ESTALE 116 /* Stale NFS file handle */
+#define EUCLEAN 117 /* Structure needs cleaning */
+#define ENOTNAM 118 /* Not a XENIX named type file */
+#define ENAVAIL 119 /* No XENIX semaphores available */
+#define EISNAM 120 /* Is a named type file */
+#define EREMOTEIO 121 /* Remote I/O error */
+#define EDQUOT 122 /* Quota exceeded */
+
+#define ENOMEDIUM 123 /* No medium found */
+#define EMEDIUMTYPE 124 /* Wrong medium type */
+
+#endif
diff --git a/xen/include/xeno/etherdevice.h b/xen/include/xeno/etherdevice.h
new file mode 100644
index 0000000000..bac9b4d5ad
--- /dev/null
+++ b/xen/include/xeno/etherdevice.h
@@ -0,0 +1,68 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. NET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions for the Ethernet handlers.
+ *
+ * Version: @(#)eth.h 1.0.4 05/13/93
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Relocated to include/linux where it belongs by Alan Cox
+ * <gw4pts@gw4pts.ampr.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * WARNING: This move may well be temporary. This file will get merged with others RSN.
+ *
+ */
+#ifndef _LINUX_ETHERDEVICE_H
+#define _LINUX_ETHERDEVICE_H
+
+#include <linux/if_ether.h>
+
+#ifdef __KERNEL__
+extern int eth_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, void *daddr,
+ void *saddr, unsigned len);
+extern int eth_rebuild_header(struct sk_buff *skb);
+extern unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
+ unsigned char * haddr);
+extern int eth_header_cache(struct neighbour *neigh,
+ struct hh_cache *hh);
+extern int eth_header_parse(struct sk_buff *skb,
+ unsigned char *haddr);
+extern struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv);
+extern struct net_device *alloc_etherdev(int sizeof_priv);
+
+static inline void eth_copy_and_sum (struct sk_buff *dest, unsigned char *src, int len, int base)
+{
+ memcpy (dest->data, src, len);
+}
+
+/**
+ * is_valid_ether_addr - Determine if the given Ethernet address is valid
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
+ * a multicast address, and is not FF:FF:FF:FF:FF:FF. The multicast
+ * and FF:FF:... tests are combined into the single test "!(addr[0]&1)".
+ *
+ * Return true if the address is valid.
+ */
+static inline int is_valid_ether_addr( u8 *addr )
+{
+ const char zaddr[6] = {0,};
+
+ return !(addr[0]&1) && memcmp( addr, zaddr, 6);
+}
+
+#endif
+
+#endif /* _LINUX_ETHERDEVICE_H */
diff --git a/xen/include/xeno/ethtool.h b/xen/include/xeno/ethtool.h
new file mode 100644
index 0000000000..e672ac5887
--- /dev/null
+++ b/xen/include/xeno/ethtool.h
@@ -0,0 +1,361 @@
+/*
+ * ethtool.h: Defines for Linux ethtool.
+ *
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
+ * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
+ * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
+ * christopher.leech@intel.com,
+ * scott.feldman@intel.com)
+ */
+
+#ifndef _LINUX_ETHTOOL_H
+#define _LINUX_ETHTOOL_H
+
+
+/* This should work for both 32 and 64 bit userland. */
+struct ethtool_cmd {
+ u32 cmd;
+ u32 supported; /* Features this interface supports */
+ u32 advertising; /* Features this interface advertises */
+ u16 speed; /* The forced speed, 10Mb, 100Mb, gigabit */
+ u8 duplex; /* Duplex, half or full */
+ u8 port; /* Which connector port */
+ u8 phy_address;
+ u8 transceiver; /* Which tranceiver to use */
+ u8 autoneg; /* Enable or disable autonegotiation */
+ u32 maxtxpkt; /* Tx pkts before generating tx int */
+ u32 maxrxpkt; /* Rx pkts before generating rx int */
+ u32 reserved[4];
+};
+
+#define ETHTOOL_BUSINFO_LEN 32
+/* these strings are set to whatever the driver author decides... */
+struct ethtool_drvinfo {
+ u32 cmd;
+ char driver[32]; /* driver short name, "tulip", "eepro100" */
+ char version[32]; /* driver version string */
+ char fw_version[32]; /* firmware version string, if applicable */
+ char bus_info[ETHTOOL_BUSINFO_LEN]; /* Bus info for this IF. */
+ /* For PCI devices, use pci_dev->slot_name. */
+ char reserved1[32];
+ char reserved2[16];
+ u32 n_stats; /* number of u64's from ETHTOOL_GSTATS */
+ u32 testinfo_len;
+ u32 eedump_len; /* Size of data from ETHTOOL_GEEPROM (bytes) */
+ u32 regdump_len; /* Size of data from ETHTOOL_GREGS (bytes) */
+};
+
+#define SOPASS_MAX 6
+/* wake-on-lan settings */
+struct ethtool_wolinfo {
+ u32 cmd;
+ u32 supported;
+ u32 wolopts;
+ u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+};
+
+/* for passing single values */
+struct ethtool_value {
+ u32 cmd;
+ u32 data;
+};
+
+/* for passing big chunks of data */
+struct ethtool_regs {
+ u32 cmd;
+ u32 version; /* driver-specific, indicates different chips/revs */
+ u32 len; /* bytes */
+ u8 data[0];
+};
+
+/* for passing EEPROM chunks */
+struct ethtool_eeprom {
+ u32 cmd;
+ u32 magic;
+ u32 offset; /* in bytes */
+ u32 len; /* in bytes */
+ u8 data[0];
+};
+
+/* for configuring coalescing parameters of chip */
+struct ethtool_coalesce {
+ u32 cmd; /* ETHTOOL_{G,S}COALESCE */
+
+ /* How many usecs to delay an RX interrupt after
+ * a packet arrives. If 0, only rx_max_coalesced_frames
+ * is used.
+ */
+ u32 rx_coalesce_usecs;
+
+ /* How many packets to delay an RX interrupt after
+ * a packet arrives. If 0, only rx_coalesce_usecs is
+ * used. It is illegal to set both usecs and max frames
+ * to zero as this would cause RX interrupts to never be
+ * generated.
+ */
+ u32 rx_max_coalesced_frames;
+
+ /* Same as above two parameters, except that these values
+ * apply while an IRQ is being services by the host. Not
+ * all cards support this feature and the values are ignored
+ * in that case.
+ */
+ u32 rx_coalesce_usecs_irq;
+ u32 rx_max_coalesced_frames_irq;
+
+ /* How many usecs to delay a TX interrupt after
+ * a packet is sent. If 0, only tx_max_coalesced_frames
+ * is used.
+ */
+ u32 tx_coalesce_usecs;
+
+ /* How many packets to delay a TX interrupt after
+ * a packet is sent. If 0, only tx_coalesce_usecs is
+ * used. It is illegal to set both usecs and max frames
+ * to zero as this would cause TX interrupts to never be
+ * generated.
+ */
+ u32 tx_max_coalesced_frames;
+
+ /* Same as above two parameters, except that these values
+ * apply while an IRQ is being services by the host. Not
+ * all cards support this feature and the values are ignored
+ * in that case.
+ */
+ u32 tx_coalesce_usecs_irq;
+ u32 tx_max_coalesced_frames_irq;
+
+ /* How many usecs to delay in-memory statistics
+ * block updates. Some drivers do not have an in-memory
+ * statistic block, and in such cases this value is ignored.
+ * This value must not be zero.
+ */
+ u32 stats_block_coalesce_usecs;
+
+ /* Adaptive RX/TX coalescing is an algorithm implemented by
+ * some drivers to improve latency under low packet rates and
+ * improve throughput under high packet rates. Some drivers
+ * only implement one of RX or TX adaptive coalescing. Anything
+ * not implemented by the driver causes these values to be
+ * silently ignored.
+ */
+ u32 use_adaptive_rx_coalesce;
+ u32 use_adaptive_tx_coalesce;
+
+ /* When the packet rate (measured in packets per second)
+ * is below pkt_rate_low, the {rx,tx}_*_low parameters are
+ * used.
+ */
+ u32 pkt_rate_low;
+ u32 rx_coalesce_usecs_low;
+ u32 rx_max_coalesced_frames_low;
+ u32 tx_coalesce_usecs_low;
+ u32 tx_max_coalesced_frames_low;
+
+ /* When the packet rate is below pkt_rate_high but above
+ * pkt_rate_low (both measured in packets per second) the
+ * normal {rx,tx}_* coalescing parameters are used.
+ */
+
+ /* When the packet rate is (measured in packets per second)
+ * is above pkt_rate_high, the {rx,tx}_*_high parameters are
+ * used.
+ */
+ u32 pkt_rate_high;
+ u32 rx_coalesce_usecs_high;
+ u32 rx_max_coalesced_frames_high;
+ u32 tx_coalesce_usecs_high;
+ u32 tx_max_coalesced_frames_high;
+
+ /* How often to do adaptive coalescing packet rate sampling,
+ * measured in seconds. Must not be zero.
+ */
+ u32 rate_sample_interval;
+};
+
+/* for configuring RX/TX ring parameters */
+struct ethtool_ringparam {
+ u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */
+
+ /* Read only attributes. These indicate the maximum number
+ * of pending RX/TX ring entries the driver will allow the
+ * user to set.
+ */
+ u32 rx_max_pending;
+ u32 rx_mini_max_pending;
+ u32 rx_jumbo_max_pending;
+ u32 tx_max_pending;
+
+ /* Values changeable by the user. The valid values are
+ * in the range 1 to the "*_max_pending" counterpart above.
+ */
+ u32 rx_pending;
+ u32 rx_mini_pending;
+ u32 rx_jumbo_pending;
+ u32 tx_pending;
+};
+
+/* for configuring link flow control parameters */
+struct ethtool_pauseparam {
+ u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */
+
+ /* If the link is being auto-negotiated (via ethtool_cmd.autoneg
+ * being true) the user may set 'autonet' here non-zero to have the
+ * pause parameters be auto-negotiated too. In such a case, the
+ * {rx,tx}_pause values below determine what capabilities are
+ * advertised.
+ *
+ * If 'autoneg' is zero or the link is not being auto-negotiated,
+ * then {rx,tx}_pause force the driver to use/not-use pause
+ * flow control.
+ */
+ u32 autoneg;
+ u32 rx_pause;
+ u32 tx_pause;
+};
+
+#define ETH_GSTRING_LEN 32
+enum ethtool_stringset {
+ ETH_SS_TEST = 0,
+ ETH_SS_STATS,
+};
+
+/* for passing string sets for data tagging */
+struct ethtool_gstrings {
+ u32 cmd; /* ETHTOOL_GSTRINGS */
+ u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/
+ u32 len; /* number of strings in the string set */
+ u8 data[0];
+};
+
+enum ethtool_test_flags {
+ ETH_TEST_FL_OFFLINE = (1 << 0), /* online / offline */
+ ETH_TEST_FL_FAILED = (1 << 1), /* test passed / failed */
+};
+
+/* for requesting NIC test and getting results*/
+struct ethtool_test {
+ u32 cmd; /* ETHTOOL_TEST */
+ u32 flags; /* ETH_TEST_FL_xxx */
+ u32 reserved;
+ u32 len; /* result length, in number of u64 elements */
+ u64 data[0];
+};
+
+/* for dumping NIC-specific statistics */
+struct ethtool_stats {
+ u32 cmd; /* ETHTOOL_GSTATS */
+ u32 n_stats; /* number of u64's being returned */
+ u64 data[0];
+};
+
+/* CMDs currently supported */
+#define ETHTOOL_GSET 0x00000001 /* Get settings. */
+#define ETHTOOL_SSET 0x00000002 /* Set settings, privileged. */
+#define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */
+#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers, privileged. */
+#define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */
+#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options, priv. */
+#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */
+#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv. */
+#define ETHTOOL_GLINK 0x0000000a /* Get link status (ethtool_value) */
+#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data, priv. */
+#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */
+#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config, priv. */
+#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */
+#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */
+#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */
+#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters, priv. */
+#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable
+ * (ethtool_value) */
+#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable
+ * (ethtool_value), priv. */
+#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */
+#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */
+#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */
+#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */
+
+/* compatibility with older code */
+#define SPARC_ETH_GSET ETHTOOL_GSET
+#define SPARC_ETH_SSET ETHTOOL_SSET
+
+/* Indicates what features are supported by the interface. */
+#define SUPPORTED_10baseT_Half (1 << 0)
+#define SUPPORTED_10baseT_Full (1 << 1)
+#define SUPPORTED_100baseT_Half (1 << 2)
+#define SUPPORTED_100baseT_Full (1 << 3)
+#define SUPPORTED_1000baseT_Half (1 << 4)
+#define SUPPORTED_1000baseT_Full (1 << 5)
+#define SUPPORTED_Autoneg (1 << 6)
+#define SUPPORTED_TP (1 << 7)
+#define SUPPORTED_AUI (1 << 8)
+#define SUPPORTED_MII (1 << 9)
+#define SUPPORTED_FIBRE (1 << 10)
+#define SUPPORTED_BNC (1 << 11)
+
+/* Indicates what features are advertised by the interface. */
+#define ADVERTISED_10baseT_Half (1 << 0)
+#define ADVERTISED_10baseT_Full (1 << 1)
+#define ADVERTISED_100baseT_Half (1 << 2)
+#define ADVERTISED_100baseT_Full (1 << 3)
+#define ADVERTISED_1000baseT_Half (1 << 4)
+#define ADVERTISED_1000baseT_Full (1 << 5)
+#define ADVERTISED_Autoneg (1 << 6)
+#define ADVERTISED_TP (1 << 7)
+#define ADVERTISED_AUI (1 << 8)
+#define ADVERTISED_MII (1 << 9)
+#define ADVERTISED_FIBRE (1 << 10)
+#define ADVERTISED_BNC (1 << 11)
+
+/* The following are all involved in forcing a particular link
+ * mode for the device for setting things. When getting the
+ * devices settings, these indicate the current mode and whether
+ * it was foced up into this mode or autonegotiated.
+ */
+
+/* The forced speed, 10Mb, 100Mb, gigabit. */
+#define SPEED_10 10
+#define SPEED_100 100
+#define SPEED_1000 1000
+
+/* Duplex, half or full. */
+#define DUPLEX_HALF 0x00
+#define DUPLEX_FULL 0x01
+
+/* Which connector port. */
+#define PORT_TP 0x00
+#define PORT_AUI 0x01
+#define PORT_MII 0x02
+#define PORT_FIBRE 0x03
+#define PORT_BNC 0x04
+
+/* Which tranceiver to use. */
+#define XCVR_INTERNAL 0x00
+#define XCVR_EXTERNAL 0x01
+#define XCVR_DUMMY1 0x02
+#define XCVR_DUMMY2 0x03
+#define XCVR_DUMMY3 0x04
+
+/* Enable or disable autonegotiation. If this is set to enable,
+ * the forced link modes above are completely ignored.
+ */
+#define AUTONEG_DISABLE 0x00
+#define AUTONEG_ENABLE 0x01
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY (1 << 0)
+#define WAKE_UCAST (1 << 1)
+#define WAKE_MCAST (1 << 2)
+#define WAKE_BCAST (1 << 3)
+#define WAKE_ARP (1 << 4)
+#define WAKE_MAGIC (1 << 5)
+#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */
+
+#endif /* _LINUX_ETHTOOL_H */
diff --git a/xen/include/xeno/event.h b/xen/include/xeno/event.h
new file mode 100644
index 0000000000..fdb9fed24d
--- /dev/null
+++ b/xen/include/xeno/event.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ * event.h
+ *
+ * A nice interface for passing asynchronous events to guest OSes.
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/bitops.h>
+
+#ifdef CONFIG_SMP
+
+/*
+ * mark_guest_event:
+ * @p: Domain to which event should be passed
+ * @event: Event number
+ * RETURNS: "Bitmask" of CPU on which process is currently running
+ *
+ * Idea is that caller may loop on task_list, looking for domains
+ * to pass events to (using this function). The caller accumulates the
+ * bits returned by this function (ORing them together) then calls
+ * event_notify().
+ *
+ * Guest_events are per-domain events passed directly to the guest OS
+ * in ring 1.
+ */
+static inline unsigned long mark_guest_event(struct task_struct *p, int event)
+{
+ set_bit(event, &p->shared_info->events);
+
+ /*
+ * No need for the runqueue_lock! The check below does not race
+ * with the setting of has_cpu, because that is set with runqueue_lock
+ * held. The lock must be released before hypervisor exit (and so
+ * a write barrier executed). And, just before hypervisor exit,
+ * outstanding events are checked. So bit is certainly set early enough.
+ */
+ smp_mb();
+ if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+ reschedule(p);
+ return p->has_cpu ? (1 << p->processor) : 0;
+}
+
+/* As above, but hyp_events are handled within the hypervisor. */
+static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
+{
+ set_bit(event, &p->hyp_events);
+ smp_mb();
+ if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+ reschedule(p);
+ return p->has_cpu ? (1 << p->processor) : 0;
+}
+
+/* Notify the given set of CPUs that guest events may be outstanding. */
+static inline void guest_event_notify(unsigned long cpu_mask)
+{
+ cpu_mask &= ~(1 << smp_processor_id());
+ if ( cpu_mask != 0 ) smp_send_event_check_mask(cpu_mask);
+}
+
+#else
+
+static inline unsigned long mark_guest_event(struct task_struct *p, int event)
+{
+ set_bit(event, &p->shared_info->events);
+ if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+ reschedule(p);
+ return 0;
+}
+
+static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
+{
+ set_bit(event, &p->hyp_events);
+ if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+ reschedule(p);
+ return 0;
+}
+
+#define guest_event_notify(_mask) ((void)0)
+
+#endif
+
+/* Notify hypervisor events in thesame way as for guest OS events. */
+#define hyp_event_notify(_mask) guest_event_notify(_mask)
+
+/* Clear a guest-OS event from a per-domain mask. */
+static inline void clear_guest_event(struct task_struct *p, int event)
+{
+ clear_bit(event, &p->shared_info->events);
+}
+
+/* Clear a hypervisor event from a per-domain mask. */
+static inline void clear_hyp_event(struct task_struct *p, int event)
+{
+ clear_bit(event, &p->hyp_events);
+}
+
+/* Called on return from (architecture-dependent) entry.S. */
+void do_hyp_events(void);
diff --git a/xen/include/xeno/genhd.h b/xen/include/xeno/genhd.h
new file mode 100644
index 0000000000..58a1734a56
--- /dev/null
+++ b/xen/include/xeno/genhd.h
@@ -0,0 +1,313 @@
+#ifndef _LINUX_GENHD_H
+#define _LINUX_GENHD_H
+
+/*
+ * genhd.h Copyright (C) 1992 Drew Eckhardt
+ * Generic hard disk header file by
+ * Drew Eckhardt
+ *
+ * <drew@colorado.edu>
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/major.h>
+
+enum {
+/* These three have identical behaviour; use the second one if DOS fdisk gets
+ confused about extended/logical partitions starting past cylinder 1023. */
+ DOS_EXTENDED_PARTITION = 5,
+ LINUX_EXTENDED_PARTITION = 0x85,
+ WIN98_EXTENDED_PARTITION = 0x0f,
+
+ LINUX_SWAP_PARTITION = 0x82,
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+
+ SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION,
+
+ DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */
+ EZD_PARTITION = 0x55, /* EZ-DRIVE */
+ DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */
+ DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */
+
+ FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */
+ OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */
+ NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */
+ BSDI_PARTITION = 0xb7, /* BSDI Partition ID */
+/* Ours is not to wonder why.. */
+ BSD_PARTITION = FREEBSD_PARTITION,
+ MINIX_PARTITION = 0x81, /* Minix Partition ID */
+ PLAN9_PARTITION = 0x39, /* Plan 9 Partition ID */
+ UNIXWARE_PARTITION = 0x63, /* Partition ID, same as */
+ /* GNU_HURD and SCO Unix */
+};
+
+struct partition {
+ unsigned char boot_ind; /* 0x80 - active */
+ unsigned char head; /* starting head */
+ unsigned char sector; /* starting sector */
+ unsigned char cyl; /* starting cylinder */
+ unsigned char sys_ind; /* What partition type */
+ unsigned char end_head; /* end head */
+ unsigned char end_sector; /* end sector */
+ unsigned char end_cyl; /* end cylinder */
+ unsigned int start_sect; /* starting sector counting from 0 */
+ unsigned int nr_sects; /* nr of sectors in partition */
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+/*# include <linux/devfs_fs_kernel.h>*/
+
+struct hd_struct {
+ unsigned long start_sect;
+ unsigned long nr_sects;
+ /*devfs_handle_t de;*/ /* primary (master) devfs entry */
+ int number; /* stupid old code wastes space */
+
+ /* Performance stats: */
+ unsigned int ios_in_flight;
+ unsigned int io_ticks;
+ unsigned int last_idle_time;
+ unsigned int last_queue_change;
+ unsigned int aveq;
+
+ unsigned int rd_ios;
+ unsigned int rd_merges;
+ unsigned int rd_ticks;
+ unsigned int rd_sectors;
+ unsigned int wr_ios;
+ unsigned int wr_merges;
+ unsigned int wr_ticks;
+ unsigned int wr_sectors;
+};
+
+#define GENHD_FL_REMOVABLE 1
+
+struct gendisk {
+ int major; /* major number of driver */
+ const char *major_name; /* name of major driver */
+ int minor_shift; /* number of times minor is shifted to
+ get real minor */
+ int max_p; /* maximum partitions per device */
+
+ struct hd_struct *part; /* [indexed by minor] */
+ int *sizes; /* [idem], device size in blocks */
+ int nr_real; /* number of real devices */
+
+ void *real_devices; /* internal use */
+ struct gendisk *next;
+ struct block_device_operations *fops;
+
+ /*devfs_handle_t *de_arr;*/ /* one per physical disc */
+ char *flags; /* one per physical disc */
+};
+
+/* drivers/block/genhd.c */
+extern struct gendisk *gendisk_head;
+
+extern void add_gendisk(struct gendisk *gp);
+extern void del_gendisk(struct gendisk *gp);
+extern struct gendisk *get_gendisk(kdev_t dev);
+extern int walk_gendisk(int (*walk)(struct gendisk *, void *), void *);
+
+#endif /* __KERNEL__ */
+
+#ifdef CONFIG_SOLARIS_X86_PARTITION
+
+#define SOLARIS_X86_NUMSLICE 8
+#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL)
+
+struct solaris_x86_slice {
+ ushort s_tag; /* ID tag of partition */
+ ushort s_flag; /* permission flags */
+ unsigned int s_start; /* start sector no of partition */
+ unsigned int s_size; /* # of blocks in partition */
+};
+
+struct solaris_x86_vtoc {
+ unsigned int v_bootinfo[3]; /* info needed by mboot (unsupported) */
+ unsigned int v_sanity; /* to verify vtoc sanity */
+ unsigned int v_version; /* layout version */
+ char v_volume[8]; /* volume name */
+ ushort v_sectorsz; /* sector size in bytes */
+ ushort v_nparts; /* number of partitions */
+ unsigned int v_reserved[10]; /* free space */
+ struct solaris_x86_slice
+ v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
+ unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */
+ char v_asciilabel[128]; /* for compatibility */
+};
+
+#endif /* CONFIG_SOLARIS_X86_PARTITION */
+
+#ifdef CONFIG_BSD_DISKLABEL
+/*
+ * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
+ * updated by Marc Espie <Marc.Espie@openbsd.org>
+ */
+
+/* check against BSD src/sys/sys/disklabel.h for consistency */
+
+#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */
+#define BSD_MAXPARTITIONS 8
+#define OPENBSD_MAXPARTITIONS 16
+#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */
+struct bsd_disklabel {
+ __u32 d_magic; /* the magic number */
+ __s16 d_type; /* drive type */
+ __s16 d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ char d_packname[16]; /* pack identifier */
+ __u32 d_secsize; /* # of bytes per sector */
+ __u32 d_nsectors; /* # of data sectors per track */
+ __u32 d_ntracks; /* # of tracks per cylinder */
+ __u32 d_ncylinders; /* # of data cylinders per unit */
+ __u32 d_secpercyl; /* # of data sectors per cylinder */
+ __u32 d_secperunit; /* # of data sectors per unit */
+ __u16 d_sparespertrack; /* # of spare sectors per track */
+ __u16 d_sparespercyl; /* # of spare sectors per cylinder */
+ __u32 d_acylinders; /* # of alt. cylinders per unit */
+ __u16 d_rpm; /* rotational speed */
+ __u16 d_interleave; /* hardware sector interleave */
+ __u16 d_trackskew; /* sector 0 skew, per track */
+ __u16 d_cylskew; /* sector 0 skew, per cylinder */
+ __u32 d_headswitch; /* head switch time, usec */
+ __u32 d_trkseek; /* track-to-track seek, usec */
+ __u32 d_flags; /* generic flags */
+#define NDDATA 5
+ __u32 d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ __u32 d_spare[NSPARE]; /* reserved for future use */
+ __u32 d_magic2; /* the magic number (again) */
+ __u16 d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ __u16 d_npartitions; /* number of partitions in following */
+ __u32 d_bbsize; /* size of boot area at sn0, bytes */
+ __u32 d_sbsize; /* max size of fs superblock, bytes */
+ struct bsd_partition { /* the partition table */
+ __u32 p_size; /* number of sectors in partition */
+ __u32 p_offset; /* starting sector */
+ __u32 p_fsize; /* filesystem basic fragment size */
+ __u8 p_fstype; /* filesystem type, see below */
+ __u8 p_frag; /* filesystem fragments per block */
+ __u16 p_cpg; /* filesystem cylinders per group */
+ } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */
+};
+
+#endif /* CONFIG_BSD_DISKLABEL */
+
+#ifdef CONFIG_UNIXWARE_DISKLABEL
+/*
+ * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
+ * and Krzysztof G. Baranowski <kgb@knm.org.pl>
+ */
+
+#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */
+#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */
+#define UNIXWARE_NUMSLICE 16
+#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */
+
+struct unixware_slice {
+ __u16 s_label; /* label */
+ __u16 s_flags; /* permission flags */
+ __u32 start_sect; /* starting sector */
+ __u32 nr_sects; /* number of sectors in slice */
+};
+
+struct unixware_disklabel {
+ __u32 d_type; /* drive type */
+ __u32 d_magic; /* the magic number */
+ __u32 d_version; /* version number */
+ char d_serial[12]; /* serial number of the device */
+ __u32 d_ncylinders; /* # of data cylinders per device */
+ __u32 d_ntracks; /* # of tracks per cylinder */
+ __u32 d_nsectors; /* # of data sectors per track */
+ __u32 d_secsize; /* # of bytes per sector */
+ __u32 d_part_start; /* # of first sector of this partition */
+ __u32 d_unknown1[12]; /* ? */
+ __u32 d_alt_tbl; /* byte offset of alternate table */
+ __u32 d_alt_len; /* byte length of alternate table */
+ __u32 d_phys_cyl; /* # of physical cylinders per device */
+ __u32 d_phys_trk; /* # of physical tracks per cylinder */
+ __u32 d_phys_sec; /* # of physical sectors per track */
+ __u32 d_phys_bytes; /* # of physical bytes per sector */
+ __u32 d_unknown2; /* ? */
+ __u32 d_unknown3; /* ? */
+ __u32 d_pad[8]; /* pad */
+
+ struct unixware_vtoc {
+ __u32 v_magic; /* the magic number */
+ __u32 v_version; /* version number */
+ char v_name[8]; /* volume name */
+ __u16 v_nslices; /* # of slices */
+ __u16 v_unknown1; /* ? */
+ __u32 v_reserved[10]; /* reserved */
+ struct unixware_slice
+ v_slice[UNIXWARE_NUMSLICE]; /* slice headers */
+ } vtoc;
+
+}; /* 408 */
+
+#endif /* CONFIG_UNIXWARE_DISKLABEL */
+
+#ifdef CONFIG_MINIX_SUBPARTITION
+# define MINIX_NR_SUBPARTITIONS 4
+#endif /* CONFIG_MINIX_SUBPARTITION */
+
+#ifdef __KERNEL__
+
+char *disk_name (struct gendisk *hd, int minor, char *buf);
+
+/*
+ * disk_round_stats is used to round off the IO statistics for a disk
+ * for a complete clock tick.
+ */
+void disk_round_stats(struct hd_struct *hd);
+
+/*
+ * Account for the completion of an IO request (used by drivers which
+ * bypass the normal end_request processing)
+ */
+struct request;
+void req_finished_io(struct request *);
+
+#ifdef DEVFS_MUST_DIE
+extern void devfs_register_partitions (struct gendisk *dev, int minor,
+ int unregister);
+#endif
+
+
+
+/*
+ * FIXME: this should use genhd->minor_shift, but that is slow to look up.
+ */
+static inline unsigned int disk_index (kdev_t dev)
+{
+ int major = MAJOR(dev);
+ int minor = MINOR(dev);
+ unsigned int index;
+
+ switch (major) {
+ case DAC960_MAJOR+0:
+ index = (minor & 0x00f8) >> 3;
+ break;
+ case SCSI_DISK0_MAJOR:
+ index = (minor & 0x00f0) >> 4;
+ break;
+ case IDE0_MAJOR: /* same as HD_MAJOR */
+ case XT_DISK_MAJOR:
+ index = (minor & 0x0040) >> 6;
+ break;
+ case IDE1_MAJOR:
+ index = ((minor & 0x0040) >> 6) + 2;
+ break;
+ default:
+ return 0;
+ }
+ return index;
+}
+
+#endif
+
+#endif
diff --git a/xen/include/xeno/hdreg.h b/xen/include/xeno/hdreg.h
new file mode 100644
index 0000000000..703b750110
--- /dev/null
+++ b/xen/include/xeno/hdreg.h
@@ -0,0 +1,662 @@
+#ifndef _LINUX_HDREG_H
+#define _LINUX_HDREG_H
+
+/*
+ * This file contains some defines for the AT-hd-controller.
+ * Various sources.
+ */
+
+#define HD_IRQ 14 /* the standard disk interrupt */
+
+/* ide.c has its own port definitions in "ide.h" */
+
+/* Hd controller regs. Ref: IBM AT Bios-listing */
+#define HD_DATA 0x1f0 /* _CTL when writing */
+#define HD_ERROR 0x1f1 /* see err-bits */
+#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */
+#define HD_SECTOR 0x1f3 /* starting sector */
+#define HD_LCYL 0x1f4 /* starting cylinder */
+#define HD_HCYL 0x1f5 /* high byte of starting cyl */
+#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */
+#define HD_STATUS 0x1f7 /* see status-bits */
+#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */
+#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */
+#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */
+
+#define HD_CMD 0x3f6 /* used for resets */
+#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */
+
+/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */
+
+/* Bits of HD_STATUS */
+#define ERR_STAT 0x01
+#define INDEX_STAT 0x02
+#define ECC_STAT 0x04 /* Corrected error */
+#define DRQ_STAT 0x08
+#define SEEK_STAT 0x10
+#define WRERR_STAT 0x20
+#define READY_STAT 0x40
+#define BUSY_STAT 0x80
+
+/* Bits for HD_ERROR */
+#define MARK_ERR 0x01 /* Bad address mark */
+#define TRK0_ERR 0x02 /* couldn't find track 0 */
+#define ABRT_ERR 0x04 /* Command aborted */
+#define MCR_ERR 0x08 /* media change request */
+#define ID_ERR 0x10 /* ID field not found */
+#define MC_ERR 0x20 /* media changed */
+#define ECC_ERR 0x40 /* Uncorrectable ECC error */
+#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */
+#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */
+
+/*
+ * Command Header sizes for IOCTL commands
+ * HDIO_DRIVE_CMD, HDIO_DRIVE_TASK, and HDIO_DRIVE_TASKFILE
+ */
+
+#if 0
+#include <asm/hdreg.h>
+typedef ide_ioreg_t task_ioreg_t;
+#else
+typedef unsigned char task_ioreg_t;
+#endif
+
+#define HDIO_DRIVE_CMD_HDR_SIZE 4*sizeof(task_ioreg_t)
+#define HDIO_DRIVE_TASK_HDR_SIZE 8*sizeof(task_ioreg_t)
+#define HDIO_DRIVE_HOB_HDR_SIZE 8*sizeof(task_ioreg_t)
+
+#define IDE_DRIVE_TASK_INVALID -1
+#define IDE_DRIVE_TASK_NO_DATA 0
+#define IDE_DRIVE_TASK_SET_XFER 1
+
+#define IDE_DRIVE_TASK_IN 2
+
+#define IDE_DRIVE_TASK_OUT 3
+#define IDE_DRIVE_TASK_RAW_WRITE 4
+
+struct hd_drive_cmd_hdr {
+ task_ioreg_t command;
+ task_ioreg_t sector_number;
+ task_ioreg_t feature;
+ task_ioreg_t sector_count;
+};
+
+typedef struct hd_drive_task_hdr {
+ task_ioreg_t data;
+ task_ioreg_t feature;
+ task_ioreg_t sector_count;
+ task_ioreg_t sector_number;
+ task_ioreg_t low_cylinder;
+ task_ioreg_t high_cylinder;
+ task_ioreg_t device_head;
+ task_ioreg_t command;
+} task_struct_t;
+
+typedef struct hd_drive_hob_hdr {
+ task_ioreg_t data;
+ task_ioreg_t feature;
+ task_ioreg_t sector_count;
+ task_ioreg_t sector_number;
+ task_ioreg_t low_cylinder;
+ task_ioreg_t high_cylinder;
+ task_ioreg_t device_head;
+ task_ioreg_t control;
+} hob_struct_t;
+
+typedef union ide_reg_valid_s {
+ unsigned all : 16;
+ struct {
+ unsigned data : 1;
+ unsigned error_feature : 1;
+ unsigned sector : 1;
+ unsigned nsector : 1;
+ unsigned lcyl : 1;
+ unsigned hcyl : 1;
+ unsigned select : 1;
+ unsigned status_command : 1;
+
+ unsigned data_hob : 1;
+ unsigned error_feature_hob : 1;
+ unsigned sector_hob : 1;
+ unsigned nsector_hob : 1;
+ unsigned lcyl_hob : 1;
+ unsigned hcyl_hob : 1;
+ unsigned select_hob : 1;
+ unsigned control_hob : 1;
+ } b;
+} ide_reg_valid_t;
+
+/*
+ * Define standard taskfile in/out register
+ */
+#define IDE_TASKFILE_STD_OUT_FLAGS 0xFE
+#define IDE_TASKFILE_STD_IN_FLAGS 0xFE
+#define IDE_HOB_STD_OUT_FLAGS 0xC0
+#define IDE_HOB_STD_IN_FLAGS 0xC0
+
+typedef struct ide_task_request_s {
+ task_ioreg_t io_ports[8];
+ task_ioreg_t hob_ports[8];
+ ide_reg_valid_t out_flags;
+ ide_reg_valid_t in_flags;
+ int data_phase;
+ int req_cmd;
+ unsigned long out_size;
+ unsigned long in_size;
+} ide_task_request_t;
+
+typedef struct ide_ioctl_request_s {
+ ide_task_request_t *task_request;
+ unsigned char *out_buffer;
+ unsigned char *in_buffer;
+} ide_ioctl_request_t;
+
+#define TASKFILE_INVALID 0x7fff
+#define TASKFILE_48 0x8000
+
+#define TASKFILE_NO_DATA 0x0000
+
+#define TASKFILE_IN 0x0001
+#define TASKFILE_MULTI_IN 0x0002
+
+#define TASKFILE_OUT 0x0004
+#define TASKFILE_MULTI_OUT 0x0008
+#define TASKFILE_IN_OUT 0x0010
+
+#define TASKFILE_IN_DMA 0x0020
+#define TASKFILE_OUT_DMA 0x0040
+#define TASKFILE_IN_DMAQ 0x0080
+#define TASKFILE_OUT_DMAQ 0x0100
+
+#define TASKFILE_P_IN 0x0200
+#define TASKFILE_P_OUT 0x0400
+#define TASKFILE_P_IN_DMA 0x0800
+#define TASKFILE_P_OUT_DMA 0x1000
+#define TASKFILE_P_IN_DMAQ 0x2000
+#define TASKFILE_P_OUT_DMAQ 0x4000
+
+/* ATA/ATAPI Commands pre T13 Spec */
+#define WIN_NOP 0x00
+#define CFA_REQ_EXT_ERROR_CODE 0x03 /* CFA Request Extended Error Code */
+#define WIN_SRST 0x08 /* ATAPI soft reset command */
+#define WIN_DEVICE_RESET 0x08
+#define WIN_RESTORE 0x10
+#define WIN_READ 0x20 /* 28-Bit */
+#define WIN_READ_EXT 0x24 /* 48-Bit */
+#define WIN_READDMA_EXT 0x25 /* 48-Bit */
+#define WIN_READDMA_QUEUED_EXT 0x26 /* 48-Bit */
+#define WIN_READ_NATIVE_MAX_EXT 0x27 /* 48-Bit */
+#define WIN_MULTREAD_EXT 0x29 /* 48-Bit */
+#define WIN_WRITE 0x30 /* 28-Bit */
+#define WIN_WRITE_EXT 0x34 /* 48-Bit */
+#define WIN_WRITEDMA_EXT 0x35 /* 48-Bit */
+#define WIN_WRITEDMA_QUEUED_EXT 0x36 /* 48-Bit */
+#define WIN_SET_MAX_EXT 0x37 /* 48-Bit */
+#define CFA_WRITE_SECT_WO_ERASE 0x38 /* CFA Write Sectors without erase */
+#define WIN_MULTWRITE_EXT 0x39 /* 48-Bit */
+#define WIN_WRITE_VERIFY 0x3C /* 28-Bit */
+#define WIN_VERIFY 0x40 /* 28-Bit - Read Verify Sectors */
+#define WIN_VERIFY_EXT 0x42 /* 48-Bit */
+#define WIN_FORMAT 0x50
+#define WIN_INIT 0x60
+#define WIN_SEEK 0x70
+#define CFA_TRANSLATE_SECTOR 0x87 /* CFA Translate Sector */
+#define WIN_DIAGNOSE 0x90
+#define WIN_SPECIFY 0x91 /* set drive geometry translation */
+#define WIN_DOWNLOAD_MICROCODE 0x92
+#define WIN_STANDBYNOW2 0x94
+#define WIN_SETIDLE2 0x97
+#define WIN_CHECKPOWERMODE2 0x98
+#define WIN_SLEEPNOW2 0x99
+#define WIN_PACKETCMD 0xA0 /* Send a packet command. */
+#define WIN_PIDENTIFY 0xA1 /* identify ATAPI device */
+#define WIN_QUEUED_SERVICE 0xA2
+#define WIN_SMART 0xB0 /* self-monitoring and reporting */
+#define CFA_ERASE_SECTORS 0xC0
+#define WIN_MULTREAD 0xC4 /* read sectors using multiple mode*/
+#define WIN_MULTWRITE 0xC5 /* write sectors using multiple mode */
+#define WIN_SETMULT 0xC6 /* enable/disable multiple mode */
+#define WIN_READDMA_QUEUED 0xC7 /* read sectors using Queued DMA transfers */
+#define WIN_READDMA 0xC8 /* read sectors using DMA transfers */
+#define WIN_WRITEDMA 0xCA /* write sectors using DMA transfers */
+#define WIN_WRITEDMA_QUEUED 0xCC /* write sectors using Queued DMA transfers */
+#define CFA_WRITE_MULTI_WO_ERASE 0xCD /* CFA Write multiple without erase */
+#define WIN_GETMEDIASTATUS 0xDA
+#define WIN_DOORLOCK 0xDE /* lock door on removable drives */
+#define WIN_DOORUNLOCK 0xDF /* unlock door on removable drives */
+#define WIN_STANDBYNOW1 0xE0
+#define WIN_IDLEIMMEDIATE 0xE1 /* force drive to become "ready" */
+#define WIN_STANDBY 0xE2 /* Set device in Standby Mode */
+#define WIN_SETIDLE1 0xE3
+#define WIN_READ_BUFFER 0xE4 /* force read only 1 sector */
+#define WIN_CHECKPOWERMODE1 0xE5
+#define WIN_SLEEPNOW1 0xE6
+#define WIN_FLUSH_CACHE 0xE7
+#define WIN_WRITE_BUFFER 0xE8 /* force write only 1 sector */
+#define WIN_FLUSH_CACHE_EXT 0xEA /* 48-Bit */
+#define WIN_IDENTIFY 0xEC /* ask drive to identify itself */
+#define WIN_MEDIAEJECT 0xED
+#define WIN_IDENTIFY_DMA 0xEE /* same as WIN_IDENTIFY, but DMA */
+#define WIN_SETFEATURES 0xEF /* set special drive features */
+#define EXABYTE_ENABLE_NEST 0xF0
+#define WIN_SECURITY_SET_PASS 0xF1
+#define WIN_SECURITY_UNLOCK 0xF2
+#define WIN_SECURITY_ERASE_PREPARE 0xF3
+#define WIN_SECURITY_ERASE_UNIT 0xF4
+#define WIN_SECURITY_FREEZE_LOCK 0xF5
+#define WIN_SECURITY_DISABLE 0xF6
+#define WIN_READ_NATIVE_MAX 0xF8 /* return the native maximum address */
+#define WIN_SET_MAX 0xF9
+#define DISABLE_SEAGATE 0xFB
+
+/* WIN_SMART sub-commands */
+
+#define SMART_READ_VALUES 0xD0
+#define SMART_READ_THRESHOLDS 0xD1
+#define SMART_AUTOSAVE 0xD2
+#define SMART_SAVE 0xD3
+#define SMART_IMMEDIATE_OFFLINE 0xD4
+#define SMART_READ_LOG_SECTOR 0xD5
+#define SMART_WRITE_LOG_SECTOR 0xD6
+#define SMART_WRITE_THRESHOLDS 0xD7
+#define SMART_ENABLE 0xD8
+#define SMART_DISABLE 0xD9
+#define SMART_STATUS 0xDA
+#define SMART_AUTO_OFFLINE 0xDB
+
+/* Password used in TF4 & TF5 executing SMART commands */
+
+#define SMART_LCYL_PASS 0x4F
+#define SMART_HCYL_PASS 0xC2
+
+/* WIN_SETFEATURES sub-commands */
+
+#define SETFEATURES_EN_WCACHE 0x02 /* Enable write cache */
+#define SETFEATURES_XFER 0x03 /* Set transfer mode */
+# define XFER_UDMA_7 0x47 /* 0100|0111 */
+# define XFER_UDMA_6 0x46 /* 0100|0110 */
+# define XFER_UDMA_5 0x45 /* 0100|0101 */
+# define XFER_UDMA_4 0x44 /* 0100|0100 */
+# define XFER_UDMA_3 0x43 /* 0100|0011 */
+# define XFER_UDMA_2 0x42 /* 0100|0010 */
+# define XFER_UDMA_1 0x41 /* 0100|0001 */
+# define XFER_UDMA_0 0x40 /* 0100|0000 */
+# define XFER_MW_DMA_2 0x22 /* 0010|0010 */
+# define XFER_MW_DMA_1 0x21 /* 0010|0001 */
+# define XFER_MW_DMA_0 0x20 /* 0010|0000 */
+# define XFER_SW_DMA_2 0x12 /* 0001|0010 */
+# define XFER_SW_DMA_1 0x11 /* 0001|0001 */
+# define XFER_SW_DMA_0 0x10 /* 0001|0000 */
+# define XFER_PIO_4 0x0C /* 0000|1100 */
+# define XFER_PIO_3 0x0B /* 0000|1011 */
+# define XFER_PIO_2 0x0A /* 0000|1010 */
+# define XFER_PIO_1 0x09 /* 0000|1001 */
+# define XFER_PIO_0 0x08 /* 0000|1000 */
+# define XFER_PIO_SLOW 0x00 /* 0000|0000 */
+#define SETFEATURES_DIS_DEFECT 0x04 /* Disable Defect Management */
+#define SETFEATURES_EN_APM 0x05 /* Enable advanced power management */
+#define SETFEATURES_DIS_MSN 0x31 /* Disable Media Status Notification */
+#define SETFEATURES_EN_AAM 0x42 /* Enable Automatic Acoustic Management */
+#define SETFEATURES_DIS_RLA 0x55 /* Disable read look-ahead feature */
+#define SETFEATURES_EN_RI 0x5D /* Enable release interrupt */
+#define SETFEATURES_EN_SI 0x5E /* Enable SERVICE interrupt */
+#define SETFEATURES_DIS_RPOD 0x66 /* Disable reverting to power on defaults */
+#define SETFEATURES_DIS_WCACHE 0x82 /* Disable write cache */
+#define SETFEATURES_EN_DEFECT 0x84 /* Enable Defect Management */
+#define SETFEATURES_DIS_APM 0x85 /* Disable advanced power management */
+#define SETFEATURES_EN_MSN 0x95 /* Enable Media Status Notification */
+#define SETFEATURES_EN_RLA 0xAA /* Enable read look-ahead feature */
+#define SETFEATURES_PREFETCH 0xAB /* Sets drive prefetch value */
+#define SETFEATURES_DIS_AAM 0xC2 /* Disable Automatic Acoustic Management */
+#define SETFEATURES_EN_RPOD 0xCC /* Enable reverting to power on defaults */
+#define SETFEATURES_DIS_RI 0xDD /* Disable release interrupt */
+#define SETFEATURES_DIS_SI 0xDE /* Disable SERVICE interrupt */
+
+/* WIN_SECURITY sub-commands */
+
+#define SECURITY_SET_PASSWORD 0xBA
+#define SECURITY_UNLOCK 0xBB
+#define SECURITY_ERASE_PREPARE 0xBC
+#define SECURITY_ERASE_UNIT 0xBD
+#define SECURITY_FREEZE_LOCK 0xBE
+#define SECURITY_DISABLE_PASSWORD 0xBF
+
+struct hd_geometry {
+ unsigned char heads;
+ unsigned char sectors;
+ unsigned short cylinders;
+ unsigned long start;
+};
+
+/* BIG GEOMETRY */
+struct hd_big_geometry {
+ unsigned char heads;
+ unsigned char sectors;
+ unsigned int cylinders;
+ unsigned long start;
+};
+
+/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x030n/0x031n */
+#define HDIO_GETGEO 0x0301 /* get device geometry */
+#define HDIO_GET_UNMASKINTR 0x0302 /* get current unmask setting */
+#define HDIO_GET_MULTCOUNT 0x0304 /* get current IDE blockmode setting */
+#define HDIO_GET_QDMA 0x0305 /* get use-qdma flag */
+#define HDIO_OBSOLETE_IDENTITY 0x0307 /* OBSOLETE, DO NOT USE: returns 142 bytes */
+#define HDIO_GET_KEEPSETTINGS 0x0308 /* get keep-settings-on-reset flag */
+#define HDIO_GET_32BIT 0x0309 /* get current io_32bit setting */
+#define HDIO_GET_NOWERR 0x030a /* get ignore-write-error flag */
+#define HDIO_GET_DMA 0x030b /* get use-dma flag */
+#define HDIO_GET_NICE 0x030c /* get nice flags */
+#define HDIO_GET_IDENTITY 0x030d /* get IDE identification info */
+#define HDIO_GET_WCACHE 0x030e /* get write cache mode on|off */
+#define HDIO_GET_ACOUSTIC 0x030f /* get acoustic value */
+#define HDIO_GET_ADDRESS 0x0310 /* */
+
+#define HDIO_GET_BUSSTATE 0x031a /* get the bus state of the hwif */
+#define HDIO_TRISTATE_HWIF 0x031b /* execute a channel tristate */
+#define HDIO_DRIVE_RESET 0x031c /* execute a device reset */
+#define HDIO_DRIVE_TASKFILE 0x031d /* execute raw taskfile */
+#define HDIO_DRIVE_TASK 0x031e /* execute task and special drive command */
+#define HDIO_DRIVE_CMD 0x031f /* execute a special drive command */
+
+#define HDIO_DRIVE_CMD_AEB HDIO_DRIVE_TASK
+
+/* hd/ide ctl's that pass (arg) non-ptr values are numbered 0x032n/0x033n */
+#define HDIO_SET_MULTCOUNT 0x0321 /* change IDE blockmode */
+#define HDIO_SET_UNMASKINTR 0x0322 /* permit other irqs during I/O */
+#define HDIO_SET_KEEPSETTINGS 0x0323 /* keep ioctl settings on reset */
+#define HDIO_SET_32BIT 0x0324 /* change io_32bit flags */
+#define HDIO_SET_NOWERR 0x0325 /* change ignore-write-error flag */
+#define HDIO_SET_DMA 0x0326 /* change use-dma flag */
+#define HDIO_SET_PIO_MODE 0x0327 /* reconfig interface to new speed */
+#define HDIO_SCAN_HWIF 0x0328 /* register and (re)scan interface */
+#define HDIO_SET_NICE 0x0329 /* set nice flags */
+#define HDIO_UNREGISTER_HWIF 0x032a /* unregister interface */
+#define HDIO_SET_WCACHE 0x032b /* change write cache enable-disable */
+#define HDIO_SET_ACOUSTIC 0x032c /* change acoustic behavior */
+#define HDIO_SET_BUSSTATE 0x032d /* set the bus state of the hwif */
+#define HDIO_SET_QDMA 0x032e /* change use-qdma flag */
+#define HDIO_SET_ADDRESS 0x032f /* change lba addressing modes */
+
+/* bus states */
+enum {
+ BUSSTATE_OFF = 0,
+ BUSSTATE_ON,
+ BUSSTATE_TRISTATE
+};
+
+/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x033n/0x033n */
+#define HDIO_GETGEO_BIG 0x0330 /* */
+#define HDIO_GETGEO_BIG_RAW 0x0331 /* */
+
+#define __NEW_HD_DRIVE_ID
+/* structure returned by HDIO_GET_IDENTITY,
+ * as per ANSI NCITS ATA6 rev.1b spec
+ */
+struct hd_driveid {
+ unsigned short config; /* lots of obsolete bit flags */
+ unsigned short cyls; /* Obsolete, "physical" cyls */
+ unsigned short reserved2; /* reserved (word 2) */
+ unsigned short heads; /* Obsolete, "physical" heads */
+ unsigned short track_bytes; /* unformatted bytes per track */
+ unsigned short sector_bytes; /* unformatted bytes per sector */
+ unsigned short sectors; /* Obsolete, "physical" sectors per track */
+ unsigned short vendor0; /* vendor unique */
+ unsigned short vendor1; /* vendor unique */
+ unsigned short vendor2; /* Retired vendor unique */
+ unsigned char serial_no[20]; /* 0 = not_specified */
+ unsigned short buf_type; /* Retired */
+ unsigned short buf_size; /* Retired, 512 byte increments
+ * 0 = not_specified
+ */
+ unsigned short ecc_bytes; /* for r/w long cmds; 0 = not_specified */
+ unsigned char fw_rev[8]; /* 0 = not_specified */
+ unsigned char model[40]; /* 0 = not_specified */
+ unsigned char max_multsect; /* 0=not_implemented */
+ unsigned char vendor3; /* vendor unique */
+ unsigned short dword_io; /* 0=not_implemented; 1=implemented */
+ unsigned char vendor4; /* vendor unique */
+ unsigned char capability; /* (upper byte of word 49)
+ * 3: IORDYsup
+ * 2: IORDYsw
+ * 1: LBA
+ * 0: DMA
+ */
+ unsigned short reserved50; /* reserved (word 50) */
+ unsigned char vendor5; /* Obsolete, vendor unique */
+ unsigned char tPIO; /* Obsolete, 0=slow, 1=medium, 2=fast */
+ unsigned char vendor6; /* Obsolete, vendor unique */
+ unsigned char tDMA; /* Obsolete, 0=slow, 1=medium, 2=fast */
+ unsigned short field_valid; /* (word 53)
+ * 2: ultra_ok word 88
+ * 1: eide_ok words 64-70
+ * 0: cur_ok words 54-58
+ */
+ unsigned short cur_cyls; /* Obsolete, logical cylinders */
+ unsigned short cur_heads; /* Obsolete, l heads */
+ unsigned short cur_sectors; /* Obsolete, l sectors per track */
+ unsigned short cur_capacity0; /* Obsolete, l total sectors on drive */
+ unsigned short cur_capacity1; /* Obsolete, (2 words, misaligned int) */
+ unsigned char multsect; /* current multiple sector count */
+ unsigned char multsect_valid; /* when (bit0==1) multsect is ok */
+ unsigned int lba_capacity; /* Obsolete, total number of sectors */
+ unsigned short dma_1word; /* Obsolete, single-word dma info */
+ unsigned short dma_mword; /* multiple-word dma info */
+ unsigned short eide_pio_modes; /* bits 0:mode3 1:mode4 */
+ unsigned short eide_dma_min; /* min mword dma cycle time (ns) */
+ unsigned short eide_dma_time; /* recommended mword dma cycle time (ns) */
+ unsigned short eide_pio; /* min cycle time (ns), no IORDY */
+ unsigned short eide_pio_iordy; /* min cycle time (ns), with IORDY */
+ unsigned short words69_70[2]; /* reserved words 69-70
+ * future command overlap and queuing
+ */
+ /* HDIO_GET_IDENTITY currently returns only words 0 through 70 */
+ unsigned short words71_74[4]; /* reserved words 71-74
+ * for IDENTIFY PACKET DEVICE command
+ */
+ unsigned short queue_depth; /* (word 75)
+ * 15:5 reserved
+ * 4:0 Maximum queue depth -1
+ */
+ unsigned short words76_79[4]; /* reserved words 76-79 */
+ unsigned short major_rev_num; /* (word 80) */
+ unsigned short minor_rev_num; /* (word 81) */
+ unsigned short command_set_1; /* (word 82) supported
+ * 15: Obsolete
+ * 14: NOP command
+ * 13: READ_BUFFER
+ * 12: WRITE_BUFFER
+ * 11: Obsolete
+ * 10: Host Protected Area
+ * 9: DEVICE Reset
+ * 8: SERVICE Interrupt
+ * 7: Release Interrupt
+ * 6: look-ahead
+ * 5: write cache
+ * 4: PACKET Command
+ * 3: Power Management Feature Set
+ * 2: Removable Feature Set
+ * 1: Security Feature Set
+ * 0: SMART Feature Set
+ */
+ unsigned short command_set_2; /* (word 83)
+ * 15: Shall be ZERO
+ * 14: Shall be ONE
+ * 13: FLUSH CACHE EXT
+ * 12: FLUSH CACHE
+ * 11: Device Configuration Overlay
+ * 10: 48-bit Address Feature Set
+ * 9: Automatic Acoustic Management
+ * 8: SET MAX security
+ * 7: reserved 1407DT PARTIES
+ * 6: SetF sub-command Power-Up
+ * 5: Power-Up in Standby Feature Set
+ * 4: Removable Media Notification
+ * 3: APM Feature Set
+ * 2: CFA Feature Set
+ * 1: READ/WRITE DMA QUEUED
+ * 0: Download MicroCode
+ */
+ unsigned short cfsse; /* (word 84)
+ * cmd set-feature supported extensions
+ * 15: Shall be ZERO
+ * 14: Shall be ONE
+ * 13:3 reserved
+ * 2: Media Serial Number Valid
+ * 1: SMART selt-test supported
+ * 0: SMART error logging
+ */
+ unsigned short cfs_enable_1; /* (word 85)
+ * command set-feature enabled
+ * 15: Obsolete
+ * 14: NOP command
+ * 13: READ_BUFFER
+ * 12: WRITE_BUFFER
+ * 11: Obsolete
+ * 10: Host Protected Area
+ * 9: DEVICE Reset
+ * 8: SERVICE Interrupt
+ * 7: Release Interrupt
+ * 6: look-ahead
+ * 5: write cache
+ * 4: PACKET Command
+ * 3: Power Management Feature Set
+ * 2: Removable Feature Set
+ * 1: Security Feature Set
+ * 0: SMART Feature Set
+ */
+ unsigned short cfs_enable_2; /* (word 86)
+ * command set-feature enabled
+ * 15: Shall be ZERO
+ * 14: Shall be ONE
+ * 13: FLUSH CACHE EXT
+ * 12: FLUSH CACHE
+ * 11: Device Configuration Overlay
+ * 10: 48-bit Address Feature Set
+ * 9: Automatic Acoustic Management
+ * 8: SET MAX security
+ * 7: reserved 1407DT PARTIES
+ * 6: SetF sub-command Power-Up
+ * 5: Power-Up in Standby Feature Set
+ * 4: Removable Media Notification
+ * 3: APM Feature Set
+ * 2: CFA Feature Set
+ * 1: READ/WRITE DMA QUEUED
+ * 0: Download MicroCode
+ */
+ unsigned short csf_default; /* (word 87)
+ * command set-feature default
+ * 15: Shall be ZERO
+ * 14: Shall be ONE
+ * 13:3 reserved
+ * 2: Media Serial Number Valid
+ * 1: SMART selt-test supported
+ * 0: SMART error logging
+ */
+ unsigned short dma_ultra; /* (word 88) */
+ unsigned short word89; /* reserved (word 89) */
+ unsigned short word90; /* reserved (word 90) */
+ unsigned short CurAPMvalues; /* current APM values */
+ unsigned short word92; /* reserved (word 92) */
+ unsigned short hw_config; /* hardware config (word 93)
+ * 15:
+ * 14:
+ * 13:
+ * 12:
+ * 11:
+ * 10:
+ * 9:
+ * 8:
+ * 7:
+ * 6:
+ * 5:
+ * 4:
+ * 3:
+ * 2:
+ * 1:
+ * 0:
+ */
+ unsigned short acoustic; /* (word 94)
+ * 15:8 Vendor's recommended value
+ * 7:0 current value
+ */
+ unsigned short words95_99[5]; /* reserved words 95-99 */
+#if 0
+ unsigned short words100_103[4] ;/* reserved words 100-103 */
+#else
+ unsigned long long lba_capacity_2;/* 48-bit total number of sectors */
+#endif
+ unsigned short words104_125[22];/* reserved words 104-125 */
+ unsigned short last_lun; /* (word 126) */
+ unsigned short word127; /* (word 127) Feature Set
+ * Removable Media Notification
+ * 15:2 reserved
+ * 1:0 00 = not supported
+ * 01 = supported
+ * 10 = reserved
+ * 11 = reserved
+ */
+ unsigned short dlf; /* (word 128)
+ * device lock function
+ * 15:9 reserved
+ * 8 security level 1:max 0:high
+ * 7:6 reserved
+ * 5 enhanced erase
+ * 4 expire
+ * 3 frozen
+ * 2 locked
+ * 1 en/disabled
+ * 0 capability
+ */
+ unsigned short csfo; /* (word 129)
+ * current set features options
+ * 15:4 reserved
+ * 3: auto reassign
+ * 2: reverting
+ * 1: read-look-ahead
+ * 0: write cache
+ */
+ unsigned short words130_155[26];/* reserved vendor words 130-155 */
+ unsigned short word156; /* reserved vendor word 156 */
+ unsigned short words157_159[3];/* reserved vendor words 157-159 */
+ unsigned short cfa_power; /* (word 160) CFA Power Mode
+ * 15 word 160 supported
+ * 14 reserved
+ * 13
+ * 12
+ * 11:0
+ */
+ unsigned short words161_175[14];/* Reserved for CFA */
+ unsigned short words176_205[31];/* Current Media Serial Number */
+ unsigned short words206_254[48];/* reserved words 206-254 */
+ unsigned short integrity_word; /* (word 255)
+ * 15:8 Checksum
+ * 7:0 Signature
+ */
+};
+
+/*
+ * IDE "nice" flags. These are used on a per drive basis to determine
+ * when to be nice and give more bandwidth to the other devices which
+ * share the same IDE bus.
+ */
+#define IDE_NICE_DSC_OVERLAP (0) /* per the DSC overlap protocol */
+#define IDE_NICE_ATAPI_OVERLAP (1) /* not supported yet */
+#define IDE_NICE_0 (2) /* when sure that it won't affect us */
+#define IDE_NICE_1 (3) /* when probably won't affect us much */
+#define IDE_NICE_2 (4) /* when we know it's on our expense */
+
+#ifdef __KERNEL__
+/*
+ * These routines are used for kernel command line parameters from main.c:
+ */
+#include <linux/config.h>
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+int ide_register(int io_port, int ctl_port, int irq);
+void ide_unregister(unsigned int);
+#endif /* CONFIG_BLK_DEV_IDE || CONFIG_BLK_DEV_IDE_MODULE */
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_HDREG_H */
diff --git a/xen/include/xeno/hdsmart.h b/xen/include/xeno/hdsmart.h
new file mode 100644
index 0000000000..7974a47fe5
--- /dev/null
+++ b/xen/include/xeno/hdsmart.h
@@ -0,0 +1,124 @@
+/*
+ * linux/include/linux/hdsmart.h
+ *
+ * Copyright (C) 1999-2000 Michael Cornwell <cornwell@acm.org>
+ * Copyright (C) 2000 Andre Hedrick <andre@linux-ide.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_HDSMART_H
+#define _LINUX_HDSMART_H
+
+#define OFFLINE_FULL_SCAN 0
+#define SHORT_SELF_TEST 1
+#define EXTEND_SELF_TEST 2
+#define SHORT_CAPTIVE_SELF_TEST 129
+#define EXTEND_CAPTIVE_SELF_TEST 130
+
+/* smart_attribute is the vendor specific in SFF-8035 spec */
+typedef struct ata_smart_attribute_s {
+ unsigned char id;
+ unsigned short status_flag;
+ unsigned char normalized;
+ unsigned char worse_normal;
+ unsigned char raw[6];
+ unsigned char reserv;
+} __attribute__ ((packed)) ata_smart_attribute_t;
+
+/* smart_values is format of the read drive Atrribute command */
+typedef struct ata_smart_values_s {
+ unsigned short revnumber;
+ ata_smart_attribute_t vendor_attributes [30];
+ unsigned char offline_data_collection_status;
+ unsigned char self_test_exec_status;
+ unsigned short total_time_to_complete_off_line;
+ unsigned char vendor_specific_366;
+ unsigned char offline_data_collection_capability;
+ unsigned short smart_capability;
+ unsigned char errorlog_capability;
+ unsigned char vendor_specific_371;
+ unsigned char short_test_completion_time;
+ unsigned char extend_test_completion_time;
+ unsigned char reserved_374_385 [12];
+ unsigned char vendor_specific_386_509 [125];
+ unsigned char chksum;
+} __attribute__ ((packed)) ata_smart_values_t;
+
+/* Smart Threshold data structures */
+/* Vendor attribute of SMART Threshold */
+typedef struct ata_smart_threshold_entry_s {
+ unsigned char id;
+ unsigned char normalized_threshold;
+ unsigned char reserved[10];
+} __attribute__ ((packed)) ata_smart_threshold_entry_t;
+
+/* Format of Read SMART THreshold Command */
+typedef struct ata_smart_thresholds_s {
+ unsigned short revnumber;
+ ata_smart_threshold_entry_t thres_entries[30];
+ unsigned char reserved[149];
+ unsigned char chksum;
+} __attribute__ ((packed)) ata_smart_thresholds_t;
+
+typedef struct ata_smart_errorlog_command_struct_s {
+ unsigned char devicecontrolreg;
+ unsigned char featuresreg;
+ unsigned char sector_count;
+ unsigned char sector_number;
+ unsigned char cylinder_low;
+ unsigned char cylinder_high;
+ unsigned char drive_head;
+ unsigned char commandreg;
+ unsigned int timestamp;
+} __attribute__ ((packed)) ata_smart_errorlog_command_struct_t;
+
+typedef struct ata_smart_errorlog_error_struct_s {
+ unsigned char error_condition;
+ unsigned char extended_error[14];
+ unsigned char state;
+ unsigned short timestamp;
+} __attribute__ ((packed)) ata_smart_errorlog_error_struct_t;
+
+typedef struct ata_smart_errorlog_struct_s {
+ ata_smart_errorlog_command_struct_t commands[6];
+ ata_smart_errorlog_error_struct_t error_struct;
+} __attribute__ ((packed)) ata_smart_errorlog_struct_t;
+
+typedef struct ata_smart_errorlog_s {
+ unsigned char revnumber;
+ unsigned char error_log_pointer;
+ ata_smart_errorlog_struct_t errorlog_struct[5];
+ unsigned short ata_error_count;
+ unsigned short non_fatal_count;
+ unsigned short drive_timeout_count;
+ unsigned char reserved[53];
+ unsigned char chksum;
+} __attribute__ ((packed)) ata_smart_errorlog_t;
+
+typedef struct ata_smart_selftestlog_struct_s {
+ unsigned char selftestnumber;
+ unsigned char selfteststatus;
+ unsigned short timestamp;
+ unsigned char selftestfailurecheckpoint;
+ unsigned int lbafirstfailure;
+ unsigned char vendorspecific[15];
+} __attribute__ ((packed)) ata_smart_selftestlog_struct_t;
+
+typedef struct ata_smart_selftestlog_s {
+ unsigned short revnumber;
+ ata_smart_selftestlog_struct_t selftest_struct[21];
+ unsigned char vendorspecific[2];
+ unsigned char mostrecenttest;
+ unsigned char resevered[2];
+ unsigned char chksum;
+} __attribute__ ((packed)) ata_smart_selftestlog_t;
+
+#endif /* _LINUX_HDSMART_H */
diff --git a/xen/include/xeno/ide.h b/xen/include/xeno/ide.h
new file mode 100644
index 0000000000..dacfd89842
--- /dev/null
+++ b/xen/include/xeno/ide.h
@@ -0,0 +1,1105 @@
+#ifndef _IDE_H
+#define _IDE_H
+/*
+ * linux/include/linux/ide.h
+ *
+ * Copyright (C) 1994-1998 Linus Torvalds & authors
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/ioport.h>
+#include <xeno/hdreg.h>
+#include <xeno/hdsmart.h>
+#include <xeno/major.h>
+#include <xeno/blkdev.h>
+/*#include <xeno/proc_fs.h>*/
+/*#include <xeno/devfs_fs_kernel.h>*/
+#include <asm/hdreg.h>
+
+/*
+ * This is the multiple IDE interface driver, as evolved from hd.c.
+ * It supports up to four IDE interfaces, on one or more IRQs (usually 14 & 15).
+ * There can be up to two drives per interface, as per the ATA-2 spec.
+ *
+ * Primary i/f: ide0: major=3; (hda) minor=0; (hdb) minor=64
+ * Secondary i/f: ide1: major=22; (hdc or hd1a) minor=0; (hdd or hd1b) minor=64
+ * Tertiary i/f: ide2: major=33; (hde) minor=0; (hdf) minor=64
+ * Quaternary i/f: ide3: major=34; (hdg) minor=0; (hdh) minor=64
+ */
+
+/******************************************************************************
+ * IDE driver configuration options (play with these as desired):
+ *
+ * REALLY_SLOW_IO can be defined in ide.c and ide-cd.c, if necessary
+ */
+#undef REALLY_FAST_IO /* define if ide ports are perfect */
+#define INITIAL_MULT_COUNT 0 /* off=0; on=2,4,8,16,32, etc.. */
+
+#ifndef SUPPORT_SLOW_DATA_PORTS /* 1 to support slow data ports */
+#define SUPPORT_SLOW_DATA_PORTS 1 /* 0 to reduce kernel size */
+#endif
+#ifndef SUPPORT_VLB_SYNC /* 1 to support weird 32-bit chips */
+#define SUPPORT_VLB_SYNC 1 /* 0 to reduce kernel size */
+#endif
+#ifndef DISK_RECOVERY_TIME /* off=0; on=access_delay_time */
+#define DISK_RECOVERY_TIME 0 /* for hardware that needs it */
+#endif
+#ifndef OK_TO_RESET_CONTROLLER /* 1 needed for good error recovery */
+#define OK_TO_RESET_CONTROLLER 1 /* 0 for use with AH2372A/B interface */
+#endif
+#ifndef FANCY_STATUS_DUMPS /* 1 for human-readable drive errors */
+#define FANCY_STATUS_DUMPS 1 /* 0 to reduce kernel size */
+#endif
+
+#ifdef CONFIG_BLK_DEV_CMD640
+#if 0 /* change to 1 when debugging cmd640 problems */
+void cmd640_dump_regs (void);
+#define CMD640_DUMP_REGS cmd640_dump_regs() /* for debugging cmd640 chipset */
+#endif
+#endif /* CONFIG_BLK_DEV_CMD640 */
+
+#ifndef DISABLE_IRQ_NOSYNC
+#define DISABLE_IRQ_NOSYNC 0
+#endif
+
+/*
+ * IDE_DRIVE_CMD is used to implement many features of the hdparm utility
+ */
+#define IDE_DRIVE_CMD 99 /* (magic) undef to reduce kernel size*/
+
+#define IDE_DRIVE_TASK 98
+
+/*
+ * IDE_DRIVE_TASKFILE is used to implement many features needed for raw tasks
+ */
+#define IDE_DRIVE_TASKFILE 97
+
+/*
+ * "No user-serviceable parts" beyond this point :)
+ *****************************************************************************/
+
+typedef unsigned char byte; /* used everywhere */
+
+/*
+ * Probably not wise to fiddle with these
+ */
+#define ERROR_MAX 8 /* Max read/write errors per sector */
+#define ERROR_RESET 3 /* Reset controller every 4th retry */
+#define ERROR_RECAL 1 /* Recalibrate every 2nd retry */
+
+/*
+ * state flags
+ */
+#define DMA_PIO_RETRY 1 /* retrying in PIO */
+
+/*
+ * Ensure that various configuration flags have compatible settings
+ */
+#ifdef REALLY_SLOW_IO
+#undef REALLY_FAST_IO
+#endif
+
+#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif))
+#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
+
+/*
+ * Definitions for accessing IDE controller registers
+ */
+#define IDE_NR_PORTS (10)
+
+#define IDE_DATA_OFFSET (0)
+#define IDE_ERROR_OFFSET (1)
+#define IDE_NSECTOR_OFFSET (2)
+#define IDE_SECTOR_OFFSET (3)
+#define IDE_LCYL_OFFSET (4)
+#define IDE_HCYL_OFFSET (5)
+#define IDE_SELECT_OFFSET (6)
+#define IDE_STATUS_OFFSET (7)
+#define IDE_CONTROL_OFFSET (8)
+#define IDE_IRQ_OFFSET (9)
+
+#define IDE_FEATURE_OFFSET IDE_ERROR_OFFSET
+#define IDE_COMMAND_OFFSET IDE_STATUS_OFFSET
+
+#define IDE_DATA_OFFSET_HOB (0)
+#define IDE_ERROR_OFFSET_HOB (1)
+#define IDE_NSECTOR_OFFSET_HOB (2)
+#define IDE_SECTOR_OFFSET_HOB (3)
+#define IDE_LCYL_OFFSET_HOB (4)
+#define IDE_HCYL_OFFSET_HOB (5)
+#define IDE_SELECT_OFFSET_HOB (6)
+#define IDE_CONTROL_OFFSET_HOB (7)
+
+#define IDE_FEATURE_OFFSET_HOB IDE_ERROR_OFFSET_HOB
+
+#define IDE_DATA_REG (HWIF(drive)->io_ports[IDE_DATA_OFFSET])
+#define IDE_ERROR_REG (HWIF(drive)->io_ports[IDE_ERROR_OFFSET])
+#define IDE_NSECTOR_REG (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET])
+#define IDE_SECTOR_REG (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET])
+#define IDE_LCYL_REG (HWIF(drive)->io_ports[IDE_LCYL_OFFSET])
+#define IDE_HCYL_REG (HWIF(drive)->io_ports[IDE_HCYL_OFFSET])
+#define IDE_SELECT_REG (HWIF(drive)->io_ports[IDE_SELECT_OFFSET])
+#define IDE_STATUS_REG (HWIF(drive)->io_ports[IDE_STATUS_OFFSET])
+#define IDE_CONTROL_REG (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET])
+#define IDE_IRQ_REG (HWIF(drive)->io_ports[IDE_IRQ_OFFSET])
+
+#define IDE_DATA_REG_HOB (HWIF(drive)->io_ports[IDE_DATA_OFFSET])
+#define IDE_ERROR_REG_HOB (HWIF(drive)->io_ports[IDE_ERROR_OFFSET])
+#define IDE_NSECTOR_REG_HOB (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET])
+#define IDE_SECTOR_REG_HOB (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET])
+#define IDE_LCYL_REG_HOB (HWIF(drive)->io_ports[IDE_LCYL_OFFSET])
+#define IDE_HCYL_REG_HOB (HWIF(drive)->io_ports[IDE_HCYL_OFFSET])
+#define IDE_SELECT_REG_HOB (HWIF(drive)->io_ports[IDE_SELECT_OFFSET])
+#define IDE_STATUS_REG_HOB (HWIF(drive)->io_ports[IDE_STATUS_OFFSET])
+#define IDE_CONTROL_REG_HOB (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET])
+
+#define IDE_FEATURE_REG IDE_ERROR_REG
+#define IDE_COMMAND_REG IDE_STATUS_REG
+#define IDE_ALTSTATUS_REG IDE_CONTROL_REG
+#define IDE_IREASON_REG IDE_NSECTOR_REG
+#define IDE_BCOUNTL_REG IDE_LCYL_REG
+#define IDE_BCOUNTH_REG IDE_HCYL_REG
+
+#define GET_ERR() IN_BYTE(IDE_ERROR_REG)
+#define GET_STAT() IN_BYTE(IDE_STATUS_REG)
+#define GET_ALTSTAT() IN_BYTE(IDE_CONTROL_REG)
+#define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good))
+#define BAD_R_STAT (BUSY_STAT | ERR_STAT)
+#define BAD_W_STAT (BAD_R_STAT | WRERR_STAT)
+#define BAD_STAT (BAD_R_STAT | DRQ_STAT)
+#define DRIVE_READY (READY_STAT | SEEK_STAT)
+#define DATA_READY (DRQ_STAT)
+
+/*
+ * Some more useful definitions
+ */
+#define IDE_MAJOR_NAME "hd" /* the same for all i/f; see also genhd.c */
+#define MAJOR_NAME IDE_MAJOR_NAME
+#define PARTN_BITS 6 /* number of minor dev bits for partitions */
+#define PARTN_MASK ((1<<PARTN_BITS)-1) /* a useful bit mask */
+#define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */
+#define CASCADE_DRIVES 8 /* per interface; 8|2 assumed by lots of code */
+#define SECTOR_SIZE 512
+#define SECTOR_WORDS (SECTOR_SIZE / 4) /* number of 32bit words per sector */
+#define IDE_LARGE_SEEK(b1,b2,t) (((b1) > (b2) + (t)) || ((b2) > (b1) + (t)))
+#define IDE_MIN(a,b) ((a)<(b) ? (a):(b))
+#define IDE_MAX(a,b) ((a)>(b) ? (a):(b))
+
+#ifndef SPLIT_WORD
+# define SPLIT_WORD(W,HB,LB) ((HB)=(W>>8), (LB)=(W-((W>>8)<<8)))
+#endif
+#ifndef MAKE_WORD
+# define MAKE_WORD(W,HB,LB) ((W)=((HB<<8)+LB))
+#endif
+
+
+/*
+ * Timeouts for various operations:
+ */
+#define WAIT_DRQ (5*HZ/100) /* 50msec - spec allows up to 20ms */
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+#define WAIT_READY (5*HZ) /* 5sec - some laptops are very slow */
+#else
+#define WAIT_READY (3*HZ/100) /* 30msec - should be instantaneous */
+#endif /* CONFIG_APM || CONFIG_APM_MODULE */
+#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?), if all ATAPI CD is closed at boot */
+#define WAIT_WORSTCASE (30*HZ) /* 30sec - worst case when spinning up */
+#define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */
+#define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */
+
+#define SELECT_DRIVE(hwif,drive) \
+{ \
+ if (hwif->selectproc) \
+ hwif->selectproc(drive); \
+ OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); \
+}
+
+#define SELECT_INTERRUPT(hwif,drive) \
+{ \
+ if (hwif->intrproc) \
+ hwif->intrproc(drive); \
+ else \
+ OUT_BYTE((drive)->ctl|2, hwif->io_ports[IDE_CONTROL_OFFSET]); \
+}
+
+#define SELECT_MASK(hwif,drive,mask) \
+{ \
+ if (hwif->maskproc) \
+ hwif->maskproc(drive,mask); \
+}
+
+#define SELECT_READ_WRITE(hwif,drive,func) \
+{ \
+ if (hwif->rwproc) \
+ hwif->rwproc(drive,func); \
+}
+
+#define QUIRK_LIST(hwif,drive) \
+{ \
+ if (hwif->quirkproc) \
+ (drive)->quirk_list = hwif->quirkproc(drive); \
+}
+
+#define HOST(hwif,chipset) \
+{ \
+ return ((hwif)->chipset == chipset) ? 1 : 0; \
+}
+
+#define IDE_DEBUG(lineno) \
+ printk("%s,%s,line=%d\n", __FILE__, __FUNCTION__, (lineno))
+
+/*
+ * Check for an interrupt and acknowledge the interrupt status
+ */
+struct hwif_s;
+typedef int (ide_ack_intr_t)(struct hwif_s *);
+
+#ifndef NO_DMA
+#define NO_DMA 255
+#endif
+
+/*
+ * hwif_chipset_t is used to keep track of the specific hardware
+ * chipset used by each IDE interface, if known.
+ */
+typedef enum { ide_unknown, ide_generic, ide_pci,
+ ide_cmd640, ide_dtc2278, ide_ali14xx,
+ ide_qd65xx, ide_umc8672, ide_ht6560b,
+ ide_pdc4030, ide_rz1000, ide_trm290,
+ ide_cmd646, ide_cy82c693, ide_4drives,
+ ide_pmac, ide_etrax100
+} hwif_chipset_t;
+
+/*
+ * Structure to hold all information about the location of this port
+ */
+typedef struct hw_regs_s {
+ ide_ioreg_t io_ports[IDE_NR_PORTS]; /* task file registers */
+ int irq; /* our irq number */
+ int dma; /* our dma entry */
+ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */
+ void *priv; /* interface specific data */
+ hwif_chipset_t chipset;
+} hw_regs_t;
+
+/*
+ * Register new hardware with ide
+ */
+int ide_register_hw(hw_regs_t *hw, struct hwif_s **hwifp);
+
+/*
+ * Set up hw_regs_t structure before calling ide_register_hw (optional)
+ */
+void ide_setup_ports( hw_regs_t *hw,
+ ide_ioreg_t base,
+ int *offsets,
+ ide_ioreg_t ctrl,
+ ide_ioreg_t intr,
+ ide_ack_intr_t *ack_intr,
+ int irq);
+
+#include <asm/ide.h>
+
+/*
+ * If the arch-dependant ide.h did not declare/define any OUT_BYTE
+ * or IN_BYTE functions, we make some defaults here.
+ */
+
+#ifndef HAVE_ARCH_OUT_BYTE
+#ifdef REALLY_FAST_IO
+#define OUT_BYTE(b,p) outb((b),(p))
+#define OUT_WORD(w,p) outw((w),(p))
+#else
+#define OUT_BYTE(b,p) outb_p((b),(p))
+#define OUT_WORD(w,p) outw_p((w),(p))
+#endif
+#endif
+
+#ifndef HAVE_ARCH_IN_BYTE
+#ifdef REALLY_FAST_IO
+#define IN_BYTE(p) (byte)inb(p)
+#define IN_WORD(p) (short)inw(p)
+#else
+#define IN_BYTE(p) (byte)inb_p(p)
+#define IN_WORD(p) (short)inw_p(p)
+#endif
+#endif
+
+/*
+ * Now for the data we need to maintain per-drive: ide_drive_t
+ */
+
+#define ide_scsi 0x21
+#define ide_disk 0x20
+#define ide_optical 0x7
+#define ide_cdrom 0x5
+#define ide_tape 0x1
+#define ide_floppy 0x0
+
+typedef union {
+ unsigned all : 8; /* all of the bits together */
+ struct {
+ unsigned set_geometry : 1; /* respecify drive geometry */
+ unsigned recalibrate : 1; /* seek to cyl 0 */
+ unsigned set_multmode : 1; /* set multmode count */
+ unsigned set_tune : 1; /* tune interface for drive */
+ unsigned reserved : 4; /* unused */
+ } b;
+} special_t;
+
+typedef struct ide_drive_s {
+ request_queue_t queue; /* request queue */
+ struct ide_drive_s *next; /* circular list of hwgroup drives */
+ unsigned long sleep; /* sleep until this time */
+ unsigned long service_start; /* time we started last request */
+ unsigned long service_time; /* service time of last request */
+ unsigned long timeout; /* max time to wait for irq */
+ special_t special; /* special action flags */
+ byte keep_settings; /* restore settings after drive reset */
+ byte using_dma; /* disk is using dma for read/write */
+ byte retry_pio; /* retrying dma capable host in pio */
+ byte state; /* retry state */
+ byte waiting_for_dma; /* dma currently in progress */
+ byte unmask; /* flag: okay to unmask other irqs */
+ byte slow; /* flag: slow data port */
+ byte bswap; /* flag: byte swap data */
+ byte dsc_overlap; /* flag: DSC overlap */
+ byte nice1; /* flag: give potential excess bandwidth */
+ unsigned present : 1; /* drive is physically present */
+ unsigned noprobe : 1; /* from: hdx=noprobe */
+ unsigned busy : 1; /* currently doing revalidate_disk() */
+ unsigned removable : 1; /* 1 if need to do check_media_change */
+ unsigned forced_geom : 1; /* 1 if hdx=c,h,s was given at boot */
+ unsigned no_unmask : 1; /* disallow setting unmask bit */
+ unsigned no_io_32bit : 1; /* disallow enabling 32bit I/O */
+ unsigned nobios : 1; /* flag: do not probe bios for drive */
+ unsigned revalidate : 1; /* request revalidation */
+ unsigned atapi_overlap : 1; /* flag: ATAPI overlap (not supported) */
+ unsigned nice0 : 1; /* flag: give obvious excess bandwidth */
+ unsigned nice2 : 1; /* flag: give a share in our own bandwidth */
+ unsigned doorlocking : 1; /* flag: for removable only: door lock/unlock works */
+ unsigned autotune : 2; /* 1=autotune, 2=noautotune, 0=default */
+ unsigned remap_0_to_1 : 2; /* 0=remap if ezdrive, 1=remap, 2=noremap */
+ unsigned ata_flash : 1; /* 1=present, 0=default */
+ unsigned addressing; /* : 2; 0=28-bit, 1=48-bit, 2=64-bit */
+ byte scsi; /* 0=default, 1=skip current ide-subdriver for ide-scsi emulation */
+ byte media; /* disk, cdrom, tape, floppy, ... */
+ select_t select; /* basic drive/head select reg value */
+ byte ctl; /* "normal" value for IDE_CONTROL_REG */
+ byte ready_stat; /* min status value for drive ready */
+ byte mult_count; /* current multiple sector setting */
+ byte mult_req; /* requested multiple sector setting */
+ byte tune_req; /* requested drive tuning setting */
+ byte io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */
+ byte bad_wstat; /* used for ignoring WRERR_STAT */
+ byte nowerr; /* used for ignoring WRERR_STAT */
+ byte sect0; /* offset of first sector for DM6:DDO */
+ unsigned int usage; /* current "open()" count for drive */
+ byte head; /* "real" number of heads */
+ byte sect; /* "real" sectors per track */
+ byte bios_head; /* BIOS/fdisk/LILO number of heads */
+ byte bios_sect; /* BIOS/fdisk/LILO sectors per track */
+ unsigned int bios_cyl; /* BIOS/fdisk/LILO number of cyls */
+ unsigned int cyl; /* "real" number of cyls */
+ unsigned long capacity; /* total number of sectors */
+ unsigned long long capacity48; /* total number of sectors */
+ unsigned int drive_data; /* for use by tuneproc/selectproc as needed */
+ void *hwif; /* actually (ide_hwif_t *) */
+ /*wait_queue_head_t wqueue;*/ /* used to wait for drive in open() */
+ struct hd_driveid *id; /* drive model identification info */
+ struct hd_struct *part; /* drive partition table */
+ char name[4]; /* drive name, such as "hda" */
+ void *driver; /* (ide_driver_t *) */
+ void *driver_data; /* extra driver data */
+ /*devfs_handle_t de; */ /* directory for device */
+ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */
+ void *settings; /* /proc/ide/ drive settings */
+ char driver_req[10]; /* requests specific driver */
+ int last_lun; /* last logical unit */
+ int forced_lun; /* if hdxlun was given at boot */
+ int lun; /* logical unit */
+ int crc_count; /* crc counter to reduce drive speed */
+ byte quirk_list; /* drive is considered quirky if set for a specific host */
+ byte suspend_reset; /* drive suspend mode flag, soft-reset recovers */
+ byte init_speed; /* transfer rate set at boot */
+ byte current_speed; /* current transfer rate set */
+ byte dn; /* now wide spread use */
+ byte wcache; /* status of write cache */
+ byte acoustic; /* acoustic management */
+ unsigned int failures; /* current failure count */
+ unsigned int max_failures; /* maximum allowed failure count */
+} ide_drive_t;
+
+/*
+ * An ide_dmaproc_t() initiates/aborts DMA read/write operations on a drive.
+ *
+ * The caller is assumed to have selected the drive and programmed the drive's
+ * sector address using CHS or LBA. All that remains is to prepare for DMA
+ * and then issue the actual read/write DMA/PIO command to the drive.
+ *
+ * Returns 0 if all went well.
+ * Returns 1 if DMA read/write could not be started, in which case the caller
+ * should either try again later, or revert to PIO for the current request.
+ */
+typedef enum { ide_dma_read, ide_dma_write, ide_dma_begin,
+ ide_dma_end, ide_dma_check, ide_dma_on,
+ ide_dma_off, ide_dma_off_quietly, ide_dma_test_irq,
+ ide_dma_bad_drive, ide_dma_good_drive,
+ ide_dma_verbose, ide_dma_retune,
+ ide_dma_lostirq, ide_dma_timeout
+} ide_dma_action_t;
+
+typedef int (ide_dmaproc_t)(ide_dma_action_t, ide_drive_t *);
+
+/*
+ * An ide_ideproc_t() performs CPU-polled transfers to/from a drive.
+ * Arguments are: the drive, the buffer pointer, and the length (in bytes or
+ * words depending on if it's an IDE or ATAPI call).
+ *
+ * If it is not defined for a controller, standard-code is used from ide.c.
+ *
+ * Controllers which are not memory-mapped in the standard way need to
+ * override that mechanism using this function to work.
+ *
+ */
+typedef enum { ideproc_ide_input_data, ideproc_ide_output_data,
+ ideproc_atapi_input_bytes, ideproc_atapi_output_bytes
+} ide_ide_action_t;
+
+typedef void (ide_ideproc_t)(ide_ide_action_t, ide_drive_t *, void *, unsigned int);
+
+/*
+ * An ide_tuneproc_t() is used to set the speed of an IDE interface
+ * to a particular PIO mode. The "byte" parameter is used
+ * to select the PIO mode by number (0,1,2,3,4,5), and a value of 255
+ * indicates that the interface driver should "auto-tune" the PIO mode
+ * according to the drive capabilities in drive->id;
+ *
+ * Not all interface types support tuning, and not all of those
+ * support all possible PIO settings. They may silently ignore
+ * or round values as they see fit.
+ */
+typedef void (ide_tuneproc_t) (ide_drive_t *, byte);
+typedef int (ide_speedproc_t) (ide_drive_t *, byte);
+
+/*
+ * This is used to provide support for strange interfaces
+ */
+typedef void (ide_selectproc_t) (ide_drive_t *);
+typedef void (ide_resetproc_t) (ide_drive_t *);
+typedef int (ide_quirkproc_t) (ide_drive_t *);
+typedef void (ide_intrproc_t) (ide_drive_t *);
+typedef void (ide_maskproc_t) (ide_drive_t *, int);
+typedef void (ide_rw_proc_t) (ide_drive_t *, ide_dma_action_t);
+
+/*
+ * ide soft-power support
+ */
+typedef int (ide_busproc_t) (ide_drive_t *, int);
+
+#define IDE_CHIPSET_PCI_MASK \
+ ((1<<ide_pci)|(1<<ide_cmd646)|(1<<ide_ali14xx))
+#define IDE_CHIPSET_IS_PCI(c) ((IDE_CHIPSET_PCI_MASK >> (c)) & 1)
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+typedef struct ide_pci_devid_s {
+ unsigned short vid;
+ unsigned short did;
+} ide_pci_devid_t;
+
+#define IDE_PCI_DEVID_NULL ((ide_pci_devid_t){0,0})
+#define IDE_PCI_DEVID_EQ(a,b) (a.vid == b.vid && a.did == b.did)
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+typedef struct hwif_s {
+ struct hwif_s *next; /* for linked-list in ide_hwgroup_t */
+ void *hwgroup; /* actually (ide_hwgroup_t *) */
+ ide_ioreg_t io_ports[IDE_NR_PORTS]; /* task file registers */
+ hw_regs_t hw; /* Hardware info */
+ ide_drive_t drives[MAX_DRIVES]; /* drive info */
+ struct gendisk *gd; /* gendisk structure */
+ ide_tuneproc_t *tuneproc; /* routine to tune PIO mode for drives */
+ ide_speedproc_t *speedproc; /* routine to retune DMA modes for drives */
+ ide_selectproc_t *selectproc; /* tweaks hardware to select drive */
+ ide_resetproc_t *resetproc; /* routine to reset controller after a disk reset */
+ ide_intrproc_t *intrproc; /* special interrupt handling for shared pci interrupts */
+ ide_maskproc_t *maskproc; /* special host masking for drive selection */
+ ide_quirkproc_t *quirkproc; /* check host's drive quirk list */
+ ide_rw_proc_t *rwproc; /* adjust timing based upon rq->cmd direction */
+ ide_ideproc_t *ideproc; /* CPU-polled transfer routine */
+ ide_dmaproc_t *dmaproc; /* dma read/write/abort routine */
+ unsigned int *dmatable_cpu; /* dma physical region descriptor table (cpu view) */
+ dma_addr_t dmatable_dma; /* dma physical region descriptor table (dma view) */
+ struct scatterlist *sg_table; /* Scatter-gather list used to build the above */
+ int sg_nents; /* Current number of entries in it */
+ int sg_dma_direction; /* dma transfer direction */
+ int sg_dma_active; /* is it in use */
+ struct hwif_s *mate; /* other hwif from same PCI chip */
+ unsigned long dma_base; /* base addr for dma ports */
+ unsigned dma_extra; /* extra addr for dma ports */
+ unsigned long config_data; /* for use by chipset-specific code */
+ unsigned long select_data; /* for use by chipset-specific code */
+ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */
+ int irq; /* our irq number */
+ byte major; /* our major number */
+ char name[6]; /* name of interface, eg. "ide0" */
+ byte index; /* 0 for ide0; 1 for ide1; ... */
+ hwif_chipset_t chipset; /* sub-module for tuning.. */
+ unsigned noprobe : 1; /* don't probe for this interface */
+ unsigned present : 1; /* this interface exists */
+ unsigned serialized : 1; /* serialized operation with mate hwif */
+ unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */
+ unsigned reset : 1; /* reset after probe */
+ unsigned autodma : 1; /* automatically try to enable DMA at boot */
+ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */
+ byte channel; /* for dual-port chips: 0=primary, 1=secondary */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+ struct pci_dev *pci_dev; /* for pci chipsets */
+ ide_pci_devid_t pci_devid; /* for pci chipsets: {VID,DID} */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+#if (DISK_RECOVERY_TIME > 0)
+ unsigned long last_time; /* time when previous rq was done */
+#endif
+ byte straight8; /* Alan's straight 8 check */
+ void *hwif_data; /* extra hwif data */
+ ide_busproc_t *busproc; /* driver soft-power interface */
+ byte bus_state; /* power state of the IDE bus */
+} ide_hwif_t;
+
+/*
+ * Status returned from various ide_ functions
+ */
+typedef enum {
+ ide_stopped, /* no drive operation was started */
+ ide_started /* a drive operation was started, and a handler was set */
+} ide_startstop_t;
+
+/*
+ * internal ide interrupt handler type
+ */
+typedef ide_startstop_t (ide_pre_handler_t)(ide_drive_t *, struct request *);
+typedef ide_startstop_t (ide_handler_t)(ide_drive_t *);
+typedef ide_startstop_t (ide_post_handler_t)(ide_drive_t *);
+
+/*
+ * when ide_timer_expiry fires, invoke a handler of this type
+ * to decide what to do.
+ */
+typedef int (ide_expiry_t)(ide_drive_t *);
+
+typedef struct hwgroup_s {
+ ide_handler_t *handler;/* irq handler, if active */
+ volatile int busy; /* BOOL: protects all fields below */
+ int sleeping; /* BOOL: wake us up on timer expiry */
+ ide_drive_t *drive; /* current drive */
+ ide_hwif_t *hwif; /* ptr to current hwif in linked-list */
+ struct request *rq; /* current request */
+ struct timer_list timer; /* failsafe timer */
+ struct request wrq; /* local copy of current write rq */
+ unsigned long poll_timeout; /* timeout value during long polls */
+ ide_expiry_t *expiry; /* queried upon timeouts */
+} ide_hwgroup_t;
+
+/* structure attached to the request for IDE_TASK_CMDS */
+
+/*
+ * configurable drive settings
+ */
+
+#define TYPE_INT 0
+#define TYPE_INTA 1
+#define TYPE_BYTE 2
+#define TYPE_SHORT 3
+
+#define SETTING_READ (1 << 0)
+#define SETTING_WRITE (1 << 1)
+#define SETTING_RW (SETTING_READ | SETTING_WRITE)
+
+typedef int (ide_procset_t)(ide_drive_t *, int);
+typedef struct ide_settings_s {
+ char *name;
+ int rw;
+ int read_ioctl;
+ int write_ioctl;
+ int data_type;
+ int min;
+ int max;
+ int mul_factor;
+ int div_factor;
+ void *data;
+ ide_procset_t *set;
+ int auto_remove;
+ struct ide_settings_s *next;
+} ide_settings_t;
+
+void ide_add_setting(ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set);
+void ide_remove_setting(ide_drive_t *drive, char *name);
+ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name);
+int ide_read_setting(ide_drive_t *t, ide_settings_t *setting);
+int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val);
+void ide_add_generic_settings(ide_drive_t *drive);
+
+#if 0
+/*
+ * /proc/ide interface
+ */
+typedef struct {
+ const char *name;
+ mode_t mode;
+ read_proc_t *read_proc;
+ write_proc_t *write_proc;
+} ide_proc_entry_t;
+#endif
+
+#ifdef CONFIG_PROC_FS
+void proc_ide_create(void);
+void proc_ide_destroy(void);
+void recreate_proc_ide_device(ide_hwif_t *, ide_drive_t *);
+void destroy_proc_ide_device(ide_hwif_t *, ide_drive_t *);
+void destroy_proc_ide_drives(ide_hwif_t *);
+void create_proc_ide_interfaces(void);
+void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data);
+void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p);
+read_proc_t proc_ide_read_capacity;
+read_proc_t proc_ide_read_geometry;
+
+/*
+ * Standard exit stuff:
+ */
+#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \
+{ \
+ len -= off; \
+ if (len < count) { \
+ *eof = 1; \
+ if (len <= 0) \
+ return 0; \
+ } else \
+ len = count; \
+ *start = page + off; \
+ return len; \
+}
+#else
+#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
+#endif
+
+/*
+ * Subdrivers support.
+ */
+#define IDE_SUBDRIVER_VERSION 1
+
+typedef int (ide_cleanup_proc)(ide_drive_t *);
+typedef int (ide_standby_proc)(ide_drive_t *);
+typedef int (ide_flushcache_proc)(ide_drive_t *);
+typedef ide_startstop_t (ide_do_request_proc)(ide_drive_t *, struct request *, unsigned long);
+typedef void (ide_end_request_proc)(byte, ide_hwgroup_t *);
+typedef int (ide_ioctl_proc)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long);
+typedef int (ide_open_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef void (ide_release_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef int (ide_check_media_change_proc)(ide_drive_t *);
+typedef void (ide_revalidate_proc)(ide_drive_t *);
+typedef void (ide_pre_reset_proc)(ide_drive_t *);
+typedef unsigned long (ide_capacity_proc)(ide_drive_t *);
+typedef ide_startstop_t (ide_special_proc)(ide_drive_t *);
+typedef void (ide_setting_proc)(ide_drive_t *);
+typedef int (ide_reinit_proc)(ide_drive_t *);
+typedef void (ata_prebuilder_proc)(ide_drive_t *);
+typedef void (atapi_prebuilder_proc)(ide_drive_t *);
+
+typedef struct ide_driver_s {
+ const char *name;
+ const char *version;
+ byte media;
+ unsigned busy : 1;
+ unsigned supports_dma : 1;
+ unsigned supports_dsc_overlap : 1;
+ ide_cleanup_proc *cleanup;
+ ide_standby_proc *standby;
+ ide_flushcache_proc *flushcache;
+ ide_do_request_proc *do_request;
+ ide_end_request_proc *end_request;
+ ide_ioctl_proc *ioctl;
+ ide_open_proc *open;
+ ide_release_proc *release;
+ ide_check_media_change_proc *media_change;
+ ide_revalidate_proc *revalidate;
+ ide_pre_reset_proc *pre_reset;
+ ide_capacity_proc *capacity;
+ ide_special_proc *special;
+ /*ide_proc_entry_t *proc;*/
+ ide_reinit_proc *reinit;
+ ata_prebuilder_proc *ata_prebuilder;
+ atapi_prebuilder_proc *atapi_prebuilder;
+} ide_driver_t;
+
+#define DRIVER(drive) ((ide_driver_t *)((drive)->driver))
+
+/*
+ * IDE modules.
+ */
+#define IDE_CHIPSET_MODULE 0 /* not supported yet */
+#define IDE_PROBE_MODULE 1
+#define IDE_DRIVER_MODULE 2
+
+typedef int (ide_module_init_proc)(void);
+
+typedef struct ide_module_s {
+ int type;
+ ide_module_init_proc *init;
+ void *info;
+ struct ide_module_s *next;
+} ide_module_t;
+
+/*
+ * ide_hwifs[] is the master data structure used to keep track
+ * of just about everything in ide.c. Whenever possible, routines
+ * should be using pointers to a drive (ide_drive_t *) or
+ * pointers to a hwif (ide_hwif_t *), rather than indexing this
+ * structure directly (the allocation/layout may change!).
+ *
+ */
+#ifndef _IDE_C
+extern ide_hwif_t ide_hwifs[]; /* master data repository */
+extern ide_module_t *ide_modules;
+extern ide_module_t *ide_probe;
+#endif
+extern int noautodma;
+
+/*
+ * We need blk.h, but we replace its end_request by our own version.
+ */
+#define IDE_DRIVER /* Toggle some magic bits in blk.h */
+#define LOCAL_END_REQUEST /* Don't generate end_request in blk.h */
+#include <xeno/blk.h>
+
+void ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup);
+
+/*
+ * This is used for (nearly) all data transfers from/to the IDE interface
+ * FIXME for 2.5, to a pointer pass verses memcpy........
+ */
+void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+
+/*
+ * This is used for (nearly) all ATAPI data transfers from/to the IDE interface
+ * FIXME for 2.5, to a pointer pass verses memcpy........
+ */
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
+
+int drive_is_ready (ide_drive_t *drive);
+
+/*
+ * This is used on exit from the driver, to designate the next irq handler
+ * and also to start the safety timer.
+ */
+void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry);
+
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat);
+
+/*
+ * ide_error() takes action based on the error returned by the controller.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
+
+/*
+ * Issue a simple drive command
+ * The drive must be selected beforehand.
+ */
+void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler);
+
+/*
+ * ide_fixstring() cleans up and (optionally) byte-swaps a text string,
+ * removing leading/trailing blanks and compressing internal blanks.
+ * It is primarily used to tidy up the model name/number fields as
+ * returned by the WIN_[P]IDENTIFY commands.
+ */
+void ide_fixstring (byte *s, const int bytecount, const int byteswap);
+
+/*
+ * This routine busy-waits for the drive status to be not "busy".
+ * It then checks the status for all of the "good" bits and none
+ * of the "bad" bits, and if all is okay it returns 0. All other
+ * cases return 1 after doing "*startstop = ide_error()", and the
+ * caller should return the updated value of "startstop" in this case.
+ * "startstop" is unchanged when the function returns 0;
+ */
+int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout);
+
+int ide_wait_noerr (ide_drive_t *drive, byte good, byte bad, unsigned long timeout);
+
+/*
+ * This routine is called from the partition-table code in genhd.c
+ * to "convert" a drive to a logical geometry with fewer than 1024 cyls.
+ */
+int ide_xlate_1024 (kdev_t, int, int, const char *);
+
+/*
+ * Convert kdev_t structure into ide_drive_t * one.
+ */
+ide_drive_t *get_info_ptr (kdev_t i_rdev);
+
+/*
+ * Return the current idea about the total capacity of this drive.
+ */
+unsigned long current_capacity (ide_drive_t *drive);
+
+/*
+ * Start a reset operation for an IDE interface.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t ide_do_reset (ide_drive_t *);
+
+/*
+ * Re-Start an operation for an IDE interface.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t restart_request (ide_drive_t *);
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_cmd (struct request *rq);
+
+/*
+ * "action" parameter type for ide_do_drive_cmd() below.
+ */
+typedef enum {
+ ide_wait, /* insert rq at end of list, and wait for it */
+ ide_next, /* insert rq immediately after current request */
+ ide_preempt, /* insert rq in front of current request */
+ ide_end /* insert rq at end of list, but don't wait for it */
+} ide_action_t;
+
+/*
+ * This function issues a special IDE device request
+ * onto the request queue.
+ *
+ * If action is ide_wait, then the rq is queued at the end of the
+ * request queue, and the function sleeps until it has been processed.
+ * This is for use when invoked from an ioctl handler.
+ *
+ * If action is ide_preempt, then the rq is queued at the head of
+ * the request queue, displacing the currently-being-processed
+ * request and this function returns immediately without waiting
+ * for the new rq to be completed. This is VERY DANGEROUS, and is
+ * intended for careful use by the ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_next, then the rq is queued immediately after
+ * the currently-being-processed-request (if any), and the function
+ * returns without waiting for the new rq to be completed. As above,
+ * This is VERY DANGEROUS, and is intended for careful use by the
+ * ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_end, then the rq is queued at the end of the
+ * request queue, and the function returns immediately without waiting
+ * for the new rq to be completed. This is again intended for careful
+ * use by the ATAPI tape/cdrom driver code.
+ */
+int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action);
+
+/*
+ * Clean up after success/failure of an explicit drive cmd.
+ * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_CMD).
+ * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASK_MASK).
+ */
+void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err);
+
+/*
+ * Issue ATA command and wait for completion. use for implementing commands in kernel
+ */
+int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf);
+
+int ide_wait_cmd_task (ide_drive_t *drive, byte *buf);
+
+typedef struct ide_task_s {
+ task_ioreg_t tfRegister[8];
+ task_ioreg_t hobRegister[8];
+ ide_reg_valid_t tf_out_flags;
+ ide_reg_valid_t tf_in_flags;
+ int data_phase;
+ int command_type;
+ ide_pre_handler_t *prehandler;
+ ide_handler_t *handler;
+ ide_post_handler_t *posthandler;
+ void *special; /* valid_t generally */
+ struct request *rq; /* copy of request */
+ unsigned long block; /* copy of block */
+} ide_task_t;
+
+typedef struct pkt_task_s {
+ task_ioreg_t tfRegister[8];
+ int data_phase;
+ int command_type;
+ ide_handler_t *handler;
+ void *special;
+ struct request *rq; /* copy of request */
+ unsigned long block; /* copy of block */
+} pkt_task_t;
+
+/*
+ * taskfile io for disks for now...
+ */
+ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task);
+
+/*
+ * Builds request from ide_ioctl
+ */
+void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler);
+
+/*
+ * Special Flagged Register Validation Caller
+ */
+// ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task);
+
+ide_startstop_t set_multmode_intr (ide_drive_t *drive);
+ide_startstop_t set_geometry_intr (ide_drive_t *drive);
+ide_startstop_t recal_intr (ide_drive_t *drive);
+ide_startstop_t task_no_data_intr (ide_drive_t *drive);
+ide_startstop_t task_in_intr (ide_drive_t *drive);
+ide_startstop_t task_mulin_intr (ide_drive_t *drive);
+ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq);
+ide_startstop_t task_out_intr (ide_drive_t *drive);
+ide_startstop_t task_mulout_intr (ide_drive_t *drive);
+void ide_init_drive_taskfile (struct request *rq);
+
+int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf);
+
+int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *cmd, byte *buf);
+
+ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile);
+ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile);
+/* Expects args is a full set of TF registers and parses the command type */
+int ide_cmd_type_parser (ide_task_t *args);
+
+int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
+
+#ifdef CONFIG_PKT_TASK_IOCTL
+int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
+#endif /* CONFIG_PKT_TASK_IOCTL */
+
+void ide_delay_50ms (void);
+int system_bus_clock(void);
+
+byte ide_auto_reduce_xfer (ide_drive_t *drive);
+int ide_driveid_update (ide_drive_t *drive);
+int ide_ata66_check (ide_drive_t *drive, ide_task_t *args);
+int ide_config_drive_speed (ide_drive_t *drive, byte speed);
+byte eighty_ninty_three (ide_drive_t *drive);
+int set_transfer (ide_drive_t *drive, ide_task_t *args);
+
+/*
+ * ide_system_bus_speed() returns what we think is the system VESA/PCI
+ * bus speed (in MHz). This is used for calculating interface PIO timings.
+ * The default is 40 for known PCI systems, 50 otherwise.
+ * The "idebus=xx" parameter can be used to override this value.
+ */
+int ide_system_bus_speed (void);
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multwrite operation.
+ */
+int ide_multwrite (ide_drive_t *drive, unsigned int mcount);
+
+/*
+ * ide_stall_queue() can be used by a drive to give excess bandwidth back
+ * to the hwgroup by sleeping for timeout jiffies.
+ */
+void ide_stall_queue (ide_drive_t *drive, unsigned long timeout);
+
+/*
+ * ide_get_queue() returns the queue which corresponds to a given device.
+ */
+request_queue_t *ide_get_queue (kdev_t dev);
+
+/*
+ * CompactFlash cards and their brethern pretend to be removable hard disks,
+ * but they never have a slave unit, and they don't have doorlock mechanisms.
+ * This test catches them, and is invoked elsewhere when setting appropriate config bits.
+ */
+int drive_is_flashcard (ide_drive_t *drive);
+
+int ide_spin_wait_hwgroup (ide_drive_t *drive);
+void ide_timer_expiry (unsigned long data);
+void ide_intr (int irq, void *dev_id, struct pt_regs *regs);
+void do_ide_request (request_queue_t * q);
+void ide_init_subdrivers (void);
+
+#ifndef _IDE_C
+extern struct block_device_operations ide_fops[];
+/*extern ide_proc_entry_t generic_subdriver_entries[];*/
+#endif
+
+int ide_reinit_drive (ide_drive_t *drive);
+
+#ifdef _IDE_C
+#ifdef CONFIG_BLK_DEV_IDE
+int ideprobe_init (void);
+#endif /* CONFIG_BLK_DEV_IDE */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+int idedisk_reinit (ide_drive_t *drive);
+int idedisk_init (void);
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDECD
+int ide_cdrom_reinit (ide_drive_t *drive);
+int ide_cdrom_init (void);
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+int idetape_reinit (ide_drive_t *drive);
+int idetape_init (void);
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+int idefloppy_reinit (ide_drive_t *drive);
+int idefloppy_init (void);
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+int idescsi_reinit (ide_drive_t *drive);
+int idescsi_init (void);
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+#endif /* _IDE_C */
+
+int ide_register_module (ide_module_t *module);
+void ide_unregister_module (ide_module_t *module);
+ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n);
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version);
+int ide_unregister_subdriver (ide_drive_t *drive);
+int ide_replace_subdriver(ide_drive_t *drive, const char *driver);
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+#define ON_BOARD 1
+#define NEVER_BOARD 0
+#ifdef CONFIG_BLK_DEV_OFFBOARD
+# define OFF_BOARD ON_BOARD
+#else /* CONFIG_BLK_DEV_OFFBOARD */
+# define OFF_BOARD NEVER_BOARD
+#endif /* CONFIG_BLK_DEV_OFFBOARD */
+
+unsigned long ide_find_free_region (unsigned short size) __init;
+void ide_scan_pcibus (int scan_direction) __init;
+#endif
+#ifdef CONFIG_BLK_DEV_IDEDMA
+#define BAD_DMA_DRIVE 0
+#define GOOD_DMA_DRIVE 1
+int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func);
+void ide_destroy_dmatable (ide_drive_t *drive);
+ide_startstop_t ide_dma_intr (ide_drive_t *drive);
+int check_drive_lists (ide_drive_t *drive, int good_bad);
+int report_drive_dmaing (ide_drive_t *drive);
+int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive);
+int ide_release_dma (ide_hwif_t *hwif);
+void ide_setup_dma (ide_hwif_t *hwif, unsigned long dmabase, unsigned int num_ports) __init;
+unsigned long ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name) __init;
+#endif
+
+void hwif_unregister (ide_hwif_t *hwif);
+
+void export_ide_init_queue (ide_drive_t *drive);
+byte export_probe_for_drive (ide_drive_t *drive);
+
+#endif /* _IDE_H */
diff --git a/xen/include/xeno/if.h b/xen/include/xeno/if.h
new file mode 100644
index 0000000000..8d3fc2b7fc
--- /dev/null
+++ b/xen/include/xeno/if.h
@@ -0,0 +1,141 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Global definitions for the INET interface module.
+ *
+ * Version: @(#)if.h 1.0.2 04/18/93
+ *
+ * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988
+ * Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IF_H
+#define _LINUX_IF_H
+
+#include <linux/types.h> /* for "__kernel_caddr_t" et al */
+#include <linux/socket.h> /* for "struct sockaddr" et al */
+
+/* Standard interface flags (netdevice->flags). */
+#define IFF_UP 0x1 /* interface is up */
+#define IFF_BROADCAST 0x2 /* broadcast address valid */
+#define IFF_DEBUG 0x4 /* turn on debugging */
+#define IFF_LOOPBACK 0x8 /* is a loopback net */
+#define IFF_POINTOPOINT 0x10 /* interface is has p-p link */
+#define IFF_NOTRAILERS 0x20 /* avoid use of trailers */
+#define IFF_RUNNING 0x40 /* resources allocated */
+#define IFF_NOARP 0x80 /* no ARP protocol */
+#define IFF_PROMISC 0x100 /* receive all packets */
+#define IFF_ALLMULTI 0x200 /* receive all multicast packets*/
+
+#define IFF_MASTER 0x400 /* master of a load balancer */
+#define IFF_SLAVE 0x800 /* slave of a load balancer */
+
+#define IFF_MULTICAST 0x1000 /* Supports multicast */
+
+#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_MASTER|IFF_SLAVE|IFF_RUNNING)
+
+#define IFF_PORTSEL 0x2000 /* can set media type */
+#define IFF_AUTOMEDIA 0x4000 /* auto media select active */
+#define IFF_DYNAMIC 0x8000 /* dialup device with changing addresses*/
+
+/* Private (from user) interface flags (netdevice->priv_flags). */
+#define IFF_802_1Q_VLAN 0x1 /* 802.1Q VLAN device. */
+
+/*
+ * Device mapping structure. I'd just gone off and designed a
+ * beautiful scheme using only loadable modules with arguments
+ * for driver options and along come the PCMCIA people 8)
+ *
+ * Ah well. The get() side of this is good for WDSETUP, and it'll
+ * be handy for debugging things. The set side is fine for now and
+ * being very small might be worth keeping for clean configuration.
+ */
+
+struct ifmap
+{
+ unsigned long mem_start;
+ unsigned long mem_end;
+ unsigned short base_addr;
+ unsigned char irq;
+ unsigned char dma;
+ unsigned char port;
+ /* 3 bytes spare */
+};
+
+/*`
+ * Interface request structure used for socket
+ * ioctl's. All interface ioctl's must have parameter
+ * definitions which begin with ifr_name. The
+ * remainder may be interface specific.
+ */
+
+struct ifreq
+{
+#define IFHWADDRLEN 6
+#define IFNAMSIZ 16
+ union
+ {
+ char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ } ifr_ifrn;
+
+ union {
+ struct sockaddr ifru_addr;
+ struct sockaddr ifru_dstaddr;
+ struct sockaddr ifru_broadaddr;
+ struct sockaddr ifru_netmask;
+ struct sockaddr ifru_hwaddr;
+ short ifru_flags;
+ int ifru_ivalue;
+ int ifru_mtu;
+ struct ifmap ifru_map;
+ char ifru_slave[IFNAMSIZ]; /* Just fits the size */
+ char ifru_newname[IFNAMSIZ];
+ char * ifru_data;
+ } ifr_ifru;
+};
+
+#define ifr_name ifr_ifrn.ifrn_name /* interface name */
+#define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */
+#define ifr_addr ifr_ifru.ifru_addr /* address */
+#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-p lnk */
+#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */
+#define ifr_netmask ifr_ifru.ifru_netmask /* interface net mask */
+#define ifr_flags ifr_ifru.ifru_flags /* flags */
+#define ifr_metric ifr_ifru.ifru_ivalue /* metric */
+#define ifr_mtu ifr_ifru.ifru_mtu /* mtu */
+#define ifr_map ifr_ifru.ifru_map /* device map */
+#define ifr_slave ifr_ifru.ifru_slave /* slave device */
+#define ifr_data ifr_ifru.ifru_data /* for use by interface */
+#define ifr_ifindex ifr_ifru.ifru_ivalue /* interface index */
+#define ifr_bandwidth ifr_ifru.ifru_ivalue /* link bandwidth */
+#define ifr_qlen ifr_ifru.ifru_ivalue /* Queue length */
+#define ifr_newname ifr_ifru.ifru_newname /* New name */
+
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+
+struct ifconf
+{
+ int ifc_len; /* size of buffer */
+ union
+ {
+ char * ifcu_buf;
+ struct ifreq *ifcu_req;
+ } ifc_ifcu;
+};
+#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */
+#define ifc_req ifc_ifcu.ifcu_req /* array of structures */
+
+
+#endif /* _LINUX_IF_H */
diff --git a/xen/include/xeno/if_ether.h b/xen/include/xeno/if_ether.h
new file mode 100644
index 0000000000..b64559d713
--- /dev/null
+++ b/xen/include/xeno/if_ether.h
@@ -0,0 +1,100 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Global definitions for the Ethernet IEEE 802.3 interface.
+ *
+ * Version: @(#)if_ether.h 1.0.1a 02/08/94
+ *
+ * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Donald Becker, <becker@super.org>
+ * Alan Cox, <alan@redhat.com>
+ * Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_IF_ETHER_H
+#define _LINUX_IF_ETHER_H
+
+/*
+ * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
+ * and FCS/CRC (frame check sequence).
+ */
+
+#define ETH_ALEN 6 /* Octets in one ethernet addr */
+#define ETH_HLEN 14 /* Total octets in header. */
+#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */
+#define ETH_DATA_LEN 1500 /* Max. octets in payload */
+#define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */
+
+/*
+ * These are the defined Ethernet Protocol ID's.
+ */
+
+#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */
+#define ETH_P_PUP 0x0200 /* Xerox PUP packet */
+#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */
+#define ETH_P_IP 0x0800 /* Internet Protocol packet */
+#define ETH_P_X25 0x0805 /* CCITT X.25 */
+#define ETH_P_ARP 0x0806 /* Address Resolution packet */
+#define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */
+#define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */
+#define ETH_P_DEC 0x6000 /* DEC Assigned proto */
+#define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */
+#define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */
+#define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */
+#define ETH_P_LAT 0x6004 /* DEC LAT */
+#define ETH_P_DIAG 0x6005 /* DEC Diagnostics */
+#define ETH_P_CUST 0x6006 /* DEC Customer use */
+#define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */
+#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */
+#define ETH_P_ATALK 0x809B /* Appletalk DDP */
+#define ETH_P_AARP 0x80F3 /* Appletalk AARP */
+#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
+#define ETH_P_IPX 0x8137 /* IPX over DIX */
+#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */
+#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */
+#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */
+#define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */
+#define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport
+ * over Ethernet
+ */
+
+/*
+ * Non DIX types. Won't clash for 1500 types.
+ */
+
+#define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */
+#define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */
+#define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */
+#define ETH_P_802_2 0x0004 /* 802.2 frames */
+#define ETH_P_SNAP 0x0005 /* Internal only */
+#define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */
+#define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/
+#define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */
+#define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */
+#define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/
+#define ETH_P_TR_802_2 0x0011 /* 802.2 frames */
+#define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */
+#define ETH_P_CONTROL 0x0016 /* Card specific control frames */
+#define ETH_P_IRDA 0x0017 /* Linux-IrDA */
+#define ETH_P_ECONET 0x0018 /* Acorn Econet */
+
+/*
+ * This is an Ethernet frame header.
+ */
+
+struct ethhdr
+{
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+ unsigned char h_source[ETH_ALEN]; /* source ether addr */
+ unsigned short h_proto; /* packet type ID field */
+};
+
+#endif /* _LINUX_IF_ETHER_H */
diff --git a/xen/include/xeno/if_packet.h b/xen/include/xeno/if_packet.h
new file mode 100644
index 0000000000..b92558549d
--- /dev/null
+++ b/xen/include/xeno/if_packet.h
@@ -0,0 +1,102 @@
+#ifndef __LINUX_IF_PACKET_H
+#define __LINUX_IF_PACKET_H
+
+struct sockaddr_pkt
+{
+ unsigned short spkt_family;
+ unsigned char spkt_device[14];
+ unsigned short spkt_protocol;
+};
+
+struct sockaddr_ll
+{
+ unsigned short sll_family;
+ unsigned short sll_protocol;
+ int sll_ifindex;
+ unsigned short sll_hatype;
+ unsigned char sll_pkttype;
+ unsigned char sll_halen;
+ unsigned char sll_addr[8];
+};
+
+/* Packet types */
+
+#define PACKET_HOST 0 /* To us */
+#define PACKET_BROADCAST 1 /* To all */
+#define PACKET_MULTICAST 2 /* To group */
+#define PACKET_OTHERHOST 3 /* To someone else */
+#define PACKET_OUTGOING 4 /* Outgoing of any type */
+/* These ones are invisible by user level */
+#define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */
+#define PACKET_FASTROUTE 6 /* Fastrouted frame */
+
+/* Packet socket options */
+
+#define PACKET_ADD_MEMBERSHIP 1
+#define PACKET_DROP_MEMBERSHIP 2
+#define PACKET_RECV_OUTPUT 3
+/* Value 4 is still used by obsolete turbo-packet. */
+#define PACKET_RX_RING 5
+#define PACKET_STATISTICS 6
+#define PACKET_COPY_THRESH 7
+
+struct tpacket_stats
+{
+ unsigned int tp_packets;
+ unsigned int tp_drops;
+};
+
+struct tpacket_hdr
+{
+ unsigned long tp_status;
+#define TP_STATUS_KERNEL 0
+#define TP_STATUS_USER 1
+#define TP_STATUS_COPY 2
+#define TP_STATUS_LOSING 4
+#define TP_STATUS_CSUMNOTREADY 8
+ unsigned int tp_len;
+ unsigned int tp_snaplen;
+ unsigned short tp_mac;
+ unsigned short tp_net;
+ unsigned int tp_sec;
+ unsigned int tp_usec;
+};
+
+#define TPACKET_ALIGNMENT 16
+#define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
+#define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
+
+/*
+ Frame structure:
+
+ - Start. Frame must be aligned to TPACKET_ALIGNMENT=16
+ - struct tpacket_hdr
+ - pad to TPACKET_ALIGNMENT=16
+ - struct sockaddr_ll
+ - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16
+ - Start+tp_mac: [ Optional MAC header ]
+ - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
+ - Pad to align to TPACKET_ALIGNMENT=16
+ */
+
+struct tpacket_req
+{
+ unsigned int tp_block_size; /* Minimal size of contiguous block */
+ unsigned int tp_block_nr; /* Number of blocks */
+ unsigned int tp_frame_size; /* Size of frame */
+ unsigned int tp_frame_nr; /* Total number of frames */
+};
+
+struct packet_mreq
+{
+ int mr_ifindex;
+ unsigned short mr_type;
+ unsigned short mr_alen;
+ unsigned char mr_address[8];
+};
+
+#define PACKET_MR_MULTICAST 0
+#define PACKET_MR_PROMISC 1
+#define PACKET_MR_ALLMULTI 2
+
+#endif
diff --git a/xen/include/xeno/if_vlan.h b/xen/include/xeno/if_vlan.h
new file mode 100644
index 0000000000..d3e96bc4cf
--- /dev/null
+++ b/xen/include/xeno/if_vlan.h
@@ -0,0 +1,256 @@
+/*
+ * VLAN An implementation of 802.1Q VLAN tagging.
+ *
+ * Authors: Ben Greear <greearb@candelatech.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef _LINUX_IF_VLAN_H_
+#define _LINUX_IF_VLAN_H_
+
+#ifdef __KERNEL__
+
+/* externally defined structs */
+struct vlan_group;
+struct net_device;
+struct sk_buff;
+struct packet_type;
+struct vlan_collection;
+struct vlan_dev_info;
+
+//#include <linux/proc_fs.h> /* for proc_dir_entry */
+#include <linux/netdevice.h>
+
+#define VLAN_HLEN 4 /* The additional bytes (on top of the Ethernet header)
+ * that VLAN requires.
+ */
+#define VLAN_ETH_ALEN 6 /* Octets in one ethernet addr */
+#define VLAN_ETH_HLEN 18 /* Total octets in header. */
+#define VLAN_ETH_ZLEN 64 /* Min. octets in frame sans FCS */
+
+/*
+ * According to 802.3ac, the packet can be 4 bytes longer. --Klika Jan
+ */
+#define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */
+#define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */
+
+struct vlan_ethhdr {
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+ unsigned char h_source[ETH_ALEN]; /* source ether addr */
+ unsigned short h_vlan_proto; /* Should always be 0x8100 */
+ unsigned short h_vlan_TCI; /* Encapsulates priority and VLAN ID */
+ unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */
+};
+
+struct vlan_hdr {
+ unsigned short h_vlan_TCI; /* Encapsulates priority and VLAN ID */
+ unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */
+};
+
+#define VLAN_VID_MASK 0xfff
+
+/* found in af_inet.c */
+extern int (*vlan_ioctl_hook)(unsigned long arg);
+
+#define VLAN_NAME "vlan"
+
+/* if this changes, algorithm will have to be reworked because this
+ * depends on completely exhausting the VLAN identifier space. Thus
+ * it gives constant time look-up, but in many cases it wastes memory.
+ */
+#define VLAN_GROUP_ARRAY_LEN 4096
+
+struct vlan_group {
+ int real_dev_ifindex; /* The ifindex of the ethernet(like) device the vlan is attached to. */
+ struct net_device *vlan_devices[VLAN_GROUP_ARRAY_LEN];
+
+ struct vlan_group *next; /* the next in the list */
+};
+
+struct vlan_priority_tci_mapping {
+ unsigned long priority;
+ unsigned short vlan_qos; /* This should be shifted when first set, so we only do it
+ * at provisioning time.
+ * ((skb->priority << 13) & 0xE000)
+ */
+ struct vlan_priority_tci_mapping *next;
+};
+
+/* Holds information that makes sense if this device is a VLAN device. */
+struct vlan_dev_info {
+ /** This will be the mapping that correlates skb->priority to
+ * 3 bits of VLAN QOS tags...
+ */
+ unsigned long ingress_priority_map[8];
+ struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
+
+ unsigned short vlan_id; /* The VLAN Identifier for this interface. */
+ unsigned short flags; /* (1 << 0) re_order_header This option will cause the
+ * VLAN code to move around the ethernet header on
+ * ingress to make the skb look **exactly** like it
+ * came in from an ethernet port. This destroys some of
+ * the VLAN information in the skb, but it fixes programs
+ * like DHCP that use packet-filtering and don't understand
+ * 802.1Q
+ */
+ struct dev_mc_list *old_mc_list; /* old multi-cast list for the VLAN interface..
+ * we save this so we can tell what changes were
+ * made, in order to feed the right changes down
+ * to the real hardware...
+ */
+ int old_allmulti; /* similar to above. */
+ int old_promiscuity; /* similar to above. */
+ struct net_device *real_dev; /* the underlying device/interface */
+ struct proc_dir_entry *dent; /* Holds the proc data */
+ unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */
+ unsigned long cnt_encap_on_xmit; /* How many times did we have to encapsulate the skb on TX. */
+ struct net_device_stats dev_stats; /* Device stats (rx-bytes, tx-pkts, etc...) */
+};
+
+#define VLAN_DEV_INFO(x) ((struct vlan_dev_info *)(x->priv))
+
+/* inline functions */
+
+static inline struct net_device_stats *vlan_dev_get_stats(struct net_device *dev)
+{
+ return &(VLAN_DEV_INFO(dev)->dev_stats);
+}
+
+static inline __u32 vlan_get_ingress_priority(struct net_device *dev,
+ unsigned short vlan_tag)
+{
+ struct vlan_dev_info *vip = VLAN_DEV_INFO(dev);
+
+ return vip->ingress_priority_map[(vlan_tag >> 13) & 0x7];
+}
+
+/* VLAN tx hw acceleration helpers. */
+struct vlan_skb_tx_cookie {
+ u32 magic;
+ u32 vlan_tag;
+};
+
+#if 0
+#define VLAN_TX_COOKIE_MAGIC 0x564c414e /* "VLAN" in ascii. */
+#define VLAN_TX_SKB_CB(__skb) ((struct vlan_skb_tx_cookie *)&((__skb)->cb[0]))
+#define vlan_tx_tag_present(__skb) \
+ (VLAN_TX_SKB_CB(__skb)->magic == VLAN_TX_COOKIE_MAGIC)
+#define vlan_tx_tag_get(__skb) (VLAN_TX_SKB_CB(__skb)->vlan_tag)
+#else /* XXX KAF: We don't support vlan tagging at the moment. */
+#define VLAN_TX_SKB_CB(__skb) NULL
+#define vlan_tx_tag_present(__skb) 0
+#define vlan_tx_tag_get(__skb) 0
+#endif
+
+#if 0
+/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */
+static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
+ struct vlan_group *grp,
+ unsigned short vlan_tag, int polling)
+{
+ struct net_device_stats *stats;
+
+ skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK];
+ if (skb->dev == NULL) {
+ kfree_skb(skb);
+
+ /* Not NET_RX_DROP, this is not being dropped
+ * due to congestion.
+ */
+ return 0;
+ }
+
+ skb->dev->last_rx = jiffies;
+
+ stats = vlan_dev_get_stats(skb->dev);
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+
+ skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tag);
+ switch (skb->pkt_type) {
+ case PACKET_BROADCAST:
+ break;
+
+ case PACKET_MULTICAST:
+ stats->multicast++;
+ break;
+
+ case PACKET_OTHERHOST:
+ /* Our lower layer thinks this is not local, let's make sure.
+ * This allows the VLAN to have a different MAC than the underlying
+ * device, and still route correctly.
+ */
+ if (!memcmp(skb->mac.ethernet->h_dest, skb->dev->dev_addr, ETH_ALEN))
+ skb->pkt_type = PACKET_HOST;
+ break;
+ };
+
+#ifdef NAPI
+ return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+#else
+ return netif_rx(skb);
+#endif
+}
+
+static inline int vlan_hwaccel_rx(struct sk_buff *skb,
+ struct vlan_group *grp,
+ unsigned short vlan_tag)
+{
+ return __vlan_hwaccel_rx(skb, grp, vlan_tag, 0);
+}
+
+static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
+ struct vlan_group *grp,
+ unsigned short vlan_tag)
+{
+ return __vlan_hwaccel_rx(skb, grp, vlan_tag, 1);
+}
+#else
+#define vlan_hwaccel_rx(_skb, _grp, _tag) (netif_rx(_skb))
+#endif
+#endif /* __KERNEL__ */
+
+/* VLAN IOCTLs are found in sockios.h */
+
+/* Passed in vlan_ioctl_args structure to determine behaviour. */
+enum vlan_ioctl_cmds {
+ ADD_VLAN_CMD,
+ DEL_VLAN_CMD,
+ SET_VLAN_INGRESS_PRIORITY_CMD,
+ SET_VLAN_EGRESS_PRIORITY_CMD,
+ GET_VLAN_INGRESS_PRIORITY_CMD,
+ GET_VLAN_EGRESS_PRIORITY_CMD,
+ SET_VLAN_NAME_TYPE_CMD,
+ SET_VLAN_FLAG_CMD
+};
+
+enum vlan_name_types {
+ VLAN_NAME_TYPE_PLUS_VID, /* Name will look like: vlan0005 */
+ VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like: eth1.0005 */
+ VLAN_NAME_TYPE_PLUS_VID_NO_PAD, /* Name will look like: vlan5 */
+ VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD, /* Name will look like: eth0.5 */
+ VLAN_NAME_TYPE_HIGHEST
+};
+
+struct vlan_ioctl_args {
+ int cmd; /* Should be one of the vlan_ioctl_cmds enum above. */
+ char device1[24];
+
+ union {
+ char device2[24];
+ int VID;
+ unsigned int skb_priority;
+ unsigned int name_type;
+ unsigned int bind_type;
+ unsigned int flag; /* Matches vlan_dev_info flags */
+ } u;
+
+ short vlan_qos;
+};
+
+#endif /* !(_LINUX_IF_VLAN_H_) */
diff --git a/xen/include/xeno/in.h b/xen/include/xeno/in.h
new file mode 100644
index 0000000000..6c090e6e4f
--- /dev/null
+++ b/xen/include/xeno/in.h
@@ -0,0 +1,191 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions of the Internet Protocol.
+ *
+ * Version: @(#)in.h 1.0.1 04/21/93
+ *
+ * Authors: Original taken from the GNU Project <netinet/in.h> file.
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IN_H
+#define _LINUX_IN_H
+
+#include <xeno/types.h>
+#include <xeno/socket.h>
+
+/* Standard well-defined IP protocols. */
+enum {
+ IPPROTO_IP = 0, /* Dummy protocol for TCP */
+ IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
+ IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
+ IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
+ IPPROTO_TCP = 6, /* Transmission Control Protocol */
+ IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
+ IPPROTO_PUP = 12, /* PUP protocol */
+ IPPROTO_UDP = 17, /* User Datagram Protocol */
+ IPPROTO_IDP = 22, /* XNS IDP protocol */
+ IPPROTO_RSVP = 46, /* RSVP protocol */
+ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
+
+ IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
+
+ IPPROTO_PIM = 103, /* Protocol Independent Multicast */
+
+ IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
+ IPPROTO_AH = 51, /* Authentication Header protocol */
+ IPPROTO_COMP = 108, /* Compression Header protocol */
+
+ IPPROTO_RAW = 255, /* Raw IP packets */
+ IPPROTO_MAX
+};
+
+
+/* Internet address. */
+struct in_addr {
+ __u32 s_addr;
+};
+
+#define IP_TOS 1
+#define IP_TTL 2
+#define IP_HDRINCL 3
+#define IP_OPTIONS 4
+#define IP_ROUTER_ALERT 5
+#define IP_RECVOPTS 6
+#define IP_RETOPTS 7
+#define IP_PKTINFO 8
+#define IP_PKTOPTIONS 9
+#define IP_MTU_DISCOVER 10
+#define IP_RECVERR 11
+#define IP_RECVTTL 12
+#define IP_RECVTOS 13
+#define IP_MTU 14
+#define IP_FREEBIND 15
+
+/* BSD compatibility */
+#define IP_RECVRETOPTS IP_RETOPTS
+
+/* IP_MTU_DISCOVER values */
+#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
+#define IP_PMTUDISC_WANT 1 /* Use per route hints */
+#define IP_PMTUDISC_DO 2 /* Always DF */
+
+#define IP_MULTICAST_IF 32
+#define IP_MULTICAST_TTL 33
+#define IP_MULTICAST_LOOP 34
+#define IP_ADD_MEMBERSHIP 35
+#define IP_DROP_MEMBERSHIP 36
+
+/* These need to appear somewhere around here */
+#define IP_DEFAULT_MULTICAST_TTL 1
+#define IP_DEFAULT_MULTICAST_LOOP 1
+
+/* Request struct for multicast socket ops */
+
+struct ip_mreq
+{
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_interface; /* local IP address of interface */
+};
+
+struct ip_mreqn
+{
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_address; /* local IP address of interface */
+ int imr_ifindex; /* Interface index */
+};
+
+struct in_pktinfo
+{
+ int ipi_ifindex;
+ struct in_addr ipi_spec_dst;
+ struct in_addr ipi_addr;
+};
+
+/* Structure describing an Internet (IP) socket address. */
+#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
+struct sockaddr_in {
+ sa_family_t sin_family; /* Address family */
+ unsigned short int sin_port; /* Port number */
+ struct in_addr sin_addr; /* Internet address */
+
+ /* Pad to size of `struct sockaddr'. */
+ unsigned char __pad[__SOCK_SIZE__ - sizeof(short int) -
+ sizeof(unsigned short int) - sizeof(struct in_addr)];
+};
+#define sin_zero __pad /* for BSD UNIX comp. -FvK */
+
+
+/*
+ * Definitions of the bits in an Internet address integer.
+ * On subnets, host and network parts are found according
+ * to the subnet mask, not these masks.
+ */
+#define IN_CLASSA(a) ((((long int) (a)) & 0x80000000) == 0)
+#define IN_CLASSA_NET 0xff000000
+#define IN_CLASSA_NSHIFT 24
+#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET)
+#define IN_CLASSA_MAX 128
+
+#define IN_CLASSB(a) ((((long int) (a)) & 0xc0000000) == 0x80000000)
+#define IN_CLASSB_NET 0xffff0000
+#define IN_CLASSB_NSHIFT 16
+#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET)
+#define IN_CLASSB_MAX 65536
+
+#define IN_CLASSC(a) ((((long int) (a)) & 0xe0000000) == 0xc0000000)
+#define IN_CLASSC_NET 0xffffff00
+#define IN_CLASSC_NSHIFT 8
+#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET)
+
+#define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000)
+#define IN_MULTICAST(a) IN_CLASSD(a)
+#define IN_MULTICAST_NET 0xF0000000
+
+#define IN_EXPERIMENTAL(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define IN_BADCLASS(a) IN_EXPERIMENTAL((a))
+
+/* Address to accept any incoming messages. */
+#define INADDR_ANY ((unsigned long int) 0x00000000)
+
+/* Address to send to all hosts. */
+#define INADDR_BROADCAST ((unsigned long int) 0xffffffff)
+
+/* Address indicating an error return. */
+#define INADDR_NONE ((unsigned long int) 0xffffffff)
+
+/* Network number for local host loopback. */
+#define IN_LOOPBACKNET 127
+
+/* Address to loopback in software to local host. */
+#define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
+#define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000)
+
+/* Defines for Multicast INADDR */
+#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */
+#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
+#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
+#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
+
+
+/* <asm/byteorder.h> contains the htonl type stuff.. */
+#include <asm/byteorder.h>
+
+#ifdef __KERNEL__
+/* Some random defines to make it easier in the kernel.. */
+#define LOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
+#define MULTICAST(x) (((x) & htonl(0xf0000000)) == htonl(0xe0000000))
+#define BADCLASS(x) (((x) & htonl(0xf0000000)) == htonl(0xf0000000))
+#define ZERONET(x) (((x) & htonl(0xff000000)) == htonl(0x00000000))
+#define LOCAL_MCAST(x) (((x) & htonl(0xFFFFFF00)) == htonl(0xE0000000))
+
+#endif
+
+#endif /* _LINUX_IN_H */
diff --git a/xen/include/xeno/init.h b/xen/include/xeno/init.h
new file mode 100644
index 0000000000..5c4477f18c
--- /dev/null
+++ b/xen/include/xeno/init.h
@@ -0,0 +1,170 @@
+#ifndef _LINUX_INIT_H
+#define _LINUX_INIT_H
+
+#include <linux/config.h>
+
+/* These macros are used to mark some functions or
+ * initialized data (doesn't apply to uninitialized data)
+ * as `initialization' functions. The kernel can take this
+ * as hint that the function is used only during the initialization
+ * phase and free up used memory resources after
+ *
+ * Usage:
+ * For functions:
+ *
+ * You should add __init immediately before the function name, like:
+ *
+ * static void __init initme(int x, int y)
+ * {
+ * extern int z; z = x * y;
+ * }
+ *
+ * If the function has a prototype somewhere, you can also add
+ * __init between closing brace of the prototype and semicolon:
+ *
+ * extern int initialize_foobar_device(int, int, int) __init;
+ *
+ * For initialized data:
+ * You should insert __initdata between the variable name and equal
+ * sign followed by value, e.g.:
+ *
+ * static int init_variable __initdata = 0;
+ * static char linux_logo[] __initdata = { 0x32, 0x36, ... };
+ *
+ * Don't forget to initialize data not at file scope, i.e. within a function,
+ * as gcc otherwise puts the data into the bss section and not into the init
+ * section.
+ *
+ * Also note, that this data cannot be "const".
+ */
+
+#ifndef MODULE
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used for initialization calls..
+ */
+typedef int (*initcall_t)(void);
+typedef void (*exitcall_t)(void);
+
+extern initcall_t __initcall_start, __initcall_end;
+
+#define __initcall(fn) \
+ static initcall_t __initcall_##fn __init_call = fn
+#define __exitcall(fn) \
+ static exitcall_t __exitcall_##fn __exit_call = fn
+
+/*
+ * Used for kernel command line parameter setup
+ */
+struct kernel_param {
+ const char *str;
+ int (*setup_func)(char *);
+};
+
+extern struct kernel_param __setup_start, __setup_end;
+
+#define __setup(str, fn) \
+ static char __setup_str_##fn[] __initdata = str; \
+ static struct kernel_param __setup_##fn __attribute__((unused)) __initsetup = { __setup_str_##fn, fn }
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Mark functions and data as being only used at initialization
+ * or exit time.
+ */
+#define __init __attribute__ ((__section__ (".text.init")))
+#define __exit __attribute__ ((unused, __section__(".text.exit")))
+#define __initdata __attribute__ ((__section__ (".data.init")))
+#define __exitdata __attribute__ ((unused, __section__ (".data.exit")))
+#define __initsetup __attribute__ ((unused,__section__ (".setup.init")))
+#define __init_call __attribute__ ((unused,__section__ (".initcall.init")))
+#define __exit_call __attribute__ ((unused,__section__ (".exitcall.exit")))
+
+/* For assembly routines */
+#define __INIT .section ".text.init","ax"
+#define __FINIT .previous
+#define __INITDATA .section ".data.init","aw"
+
+/**
+ * module_init() - driver initialization entry point
+ * @x: function to be run at kernel boot time or module insertion
+ *
+ * module_init() will add the driver initialization routine in
+ * the "__initcall.int" code segment if the driver is checked as
+ * "y" or static, or else it will wrap the driver initialization
+ * routine with init_module() which is used by insmod and
+ * modprobe when the driver is used as a module.
+ */
+#define module_init(x) __initcall(x);
+
+/**
+ * module_exit() - driver exit entry point
+ * @x: function to be run when driver is removed
+ *
+ * module_exit() will wrap the driver clean-up code
+ * with cleanup_module() when used with rmmod when
+ * the driver is a module. If the driver is statically
+ * compiled into the kernel, module_exit() has no effect.
+ */
+#define module_exit(x) __exitcall(x);
+
+#else
+
+#define __init
+#define __exit
+#define __initdata
+#define __exitdata
+#define __initcall(fn)
+/* For assembly routines */
+#define __INIT
+#define __FINIT
+#define __INITDATA
+
+/* These macros create a dummy inline: gcc 2.9x does not count alias
+ as usage, hence the `unused function' warning when __init functions
+ are declared static. We use the dummy __*_module_inline functions
+ both to kill the warning and check the type of the init/cleanup
+ function. */
+typedef int (*__init_module_func_t)(void);
+typedef void (*__cleanup_module_func_t)(void);
+#define module_init(x) \
+ int init_module(void) __attribute__((alias(#x))); \
+ static inline __init_module_func_t __init_module_inline(void) \
+ { return x; }
+#define module_exit(x) \
+ void cleanup_module(void) __attribute__((alias(#x))); \
+ static inline __cleanup_module_func_t __cleanup_module_inline(void) \
+ { return x; }
+
+#define __setup(str,func) /* nothing */
+
+#endif
+
+#ifdef CONFIG_HOTPLUG
+#define __devinit
+#define __devinitdata
+#define __devexit
+#define __devexitdata
+#else
+#define __devinit __init
+#define __devinitdata __initdata
+#define __devexit __exit
+#define __devexitdata __exitdata
+#endif
+
+/* Functions marked as __devexit may be discarded at kernel link time, depending
+ on config options. Newer versions of binutils detect references from
+ retained sections to discarded sections and flag an error. Pointers to
+ __devexit functions must use __devexit_p(function_name), the wrapper will
+ insert either the function_name or NULL, depending on the config options.
+ */
+#if defined(MODULE) || defined(CONFIG_HOTPLUG)
+#define __devexit_p(x) x
+#else
+#define __devexit_p(x) NULL
+#endif
+
+#endif /* _LINUX_INIT_H */
diff --git a/xen/include/xeno/interrupt.h b/xen/include/xeno/interrupt.h
new file mode 100644
index 0000000000..488809b99a
--- /dev/null
+++ b/xen/include/xeno/interrupt.h
@@ -0,0 +1,258 @@
+/* interrupt.h */
+#ifndef _LINUX_INTERRUPT_H
+#define _LINUX_INTERRUPT_H
+
+#include <linux/config.h>
+//#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/cache.h>
+
+#include <asm/bitops.h>
+#include <asm/atomic.h>
+#include <asm/ptrace.h>
+
+struct irqaction {
+ void (*handler)(int, void *, struct pt_regs *);
+ unsigned long flags;
+ unsigned long mask;
+ const char *name;
+ void *dev_id;
+ struct irqaction *next;
+};
+
+
+/* Who gets which entry in bh_base. Things which will occur most often
+ should come first */
+
+enum {
+ TIMER_BH = 0,
+ TQUEUE_BH,
+ SCSI_BH,
+ IMMEDIATE_BH
+};
+
+#include <asm/hardirq.h>
+#include <asm/softirq.h>
+
+
+
+/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
+ frequency threaded job scheduling. For almost all the purposes
+ tasklets are more than enough. F.e. all serial device BHs et
+ al. should be converted to tasklets, not to softirqs.
+ */
+
+enum
+{
+ HI_SOFTIRQ=0,
+ TASKLET_SOFTIRQ
+};
+
+/* softirq mask and active fields moved to irq_cpustat_t in
+ * asm/hardirq.h to get better cache usage. KAO
+ */
+
+struct softirq_action
+{
+ void (*action)(struct softirq_action *);
+ void *data;
+};
+
+asmlinkage void do_softirq(void);
+extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
+extern void softirq_init(void);
+#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0)
+extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr));
+extern void FASTCALL(raise_softirq(unsigned int nr));
+
+
+
+/* Tasklets --- multithreaded analogue of BHs.
+
+ Main feature differing them of generic softirqs: tasklet
+ is running only on one CPU simultaneously.
+
+ Main feature differing them of BHs: different tasklets
+ may be run simultaneously on different CPUs.
+
+ Properties:
+ * If tasklet_schedule() is called, then tasklet is guaranteed
+ to be executed on some cpu at least once after this.
+ * If the tasklet is already scheduled, but its excecution is still not
+ started, it will be executed only once.
+ * If this tasklet is already running on another CPU (or schedule is called
+ from tasklet itself), it is rescheduled for later.
+ * Tasklet is strictly serialized wrt itself, but not
+ wrt another tasklets. If client needs some intertask synchronization,
+ he makes it with spinlocks.
+ */
+
+struct tasklet_struct
+{
+ struct tasklet_struct *next;
+ unsigned long state;
+ atomic_t count;
+ void (*func)(unsigned long);
+ unsigned long data;
+};
+
+#define DECLARE_TASKLET(name, func, data) \
+struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data }
+
+#define DECLARE_TASKLET_DISABLED(name, func, data) \
+struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
+
+
+enum
+{
+ TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
+ TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
+};
+
+struct tasklet_head
+{
+ struct tasklet_struct *list;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct tasklet_head tasklet_vec[NR_CPUS];
+extern struct tasklet_head tasklet_hi_vec[NR_CPUS];
+
+#ifdef CONFIG_SMP
+static inline int tasklet_trylock(struct tasklet_struct *t)
+{
+ return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
+}
+
+static inline void tasklet_unlock(struct tasklet_struct *t)
+{
+ smp_mb__before_clear_bit();
+ clear_bit(TASKLET_STATE_RUN, &(t)->state);
+}
+
+static inline void tasklet_unlock_wait(struct tasklet_struct *t)
+{
+ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
+}
+#else
+#define tasklet_trylock(t) 1
+#define tasklet_unlock_wait(t) do { } while (0)
+#define tasklet_unlock(t) do { } while (0)
+#endif
+
+extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t));
+
+static inline void tasklet_schedule(struct tasklet_struct *t)
+{
+ if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+ __tasklet_schedule(t);
+}
+
+extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t));
+
+static inline void tasklet_hi_schedule(struct tasklet_struct *t)
+{
+ if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+ __tasklet_hi_schedule(t);
+}
+
+
+static inline void tasklet_disable_nosync(struct tasklet_struct *t)
+{
+ atomic_inc(&t->count);
+ smp_mb__after_atomic_inc();
+}
+
+static inline void tasklet_disable(struct tasklet_struct *t)
+{
+ tasklet_disable_nosync(t);
+ tasklet_unlock_wait(t);
+ smp_mb();
+}
+
+static inline void tasklet_enable(struct tasklet_struct *t)
+{
+ smp_mb__before_atomic_dec();
+ if (atomic_dec_and_test(&t->count) &&
+ test_bit(TASKLET_STATE_SCHED, &t->state))
+ __tasklet_schedule(t);
+}
+
+static inline void tasklet_hi_enable(struct tasklet_struct *t)
+{
+ smp_mb__before_atomic_dec();
+ if (atomic_dec_and_test(&t->count) &&
+ test_bit(TASKLET_STATE_SCHED, &t->state))
+ __tasklet_hi_schedule(t);
+}
+
+extern void tasklet_kill(struct tasklet_struct *t);
+extern void tasklet_init(struct tasklet_struct *t,
+ void (*func)(unsigned long), unsigned long data);
+
+#ifdef CONFIG_SMP
+
+#define SMP_TIMER_NAME(name) name##__thr
+
+#define SMP_TIMER_DEFINE(name, task) \
+DECLARE_TASKLET(task, name##__thr, 0); \
+static void name (unsigned long dummy) \
+{ \
+ tasklet_schedule(&(task)); \
+}
+
+#else /* CONFIG_SMP */
+
+#define SMP_TIMER_NAME(name) name
+#define SMP_TIMER_DEFINE(name, task)
+
+#endif /* CONFIG_SMP */
+
+
+/* Old BH definitions */
+
+extern struct tasklet_struct bh_task_vec[];
+
+/* It is exported _ONLY_ for wait_on_irq(). */
+extern spinlock_t global_bh_lock;
+
+static inline void mark_bh(int nr)
+{
+ tasklet_hi_schedule(bh_task_vec+nr);
+}
+
+extern void init_bh(int nr, void (*routine)(void));
+extern void remove_bh(int nr);
+
+
+/*
+ * Autoprobing for irqs:
+ *
+ * probe_irq_on() and probe_irq_off() provide robust primitives
+ * for accurate IRQ probing during kernel initialization. They are
+ * reasonably simple to use, are not "fooled" by spurious interrupts,
+ * and, unlike other attempts at IRQ probing, they do not get hung on
+ * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards).
+ *
+ * For reasonably foolproof probing, use them as follows:
+ *
+ * 1. clear and/or mask the device's internal interrupt.
+ * 2. sti();
+ * 3. irqs = probe_irq_on(); // "take over" all unassigned idle IRQs
+ * 4. enable the device and cause it to trigger an interrupt.
+ * 5. wait for the device to interrupt, using non-intrusive polling or a delay.
+ * 6. irq = probe_irq_off(irqs); // get IRQ number, 0=none, negative=multiple
+ * 7. service the device to clear its pending interrupt.
+ * 8. loop again if paranoia is required.
+ *
+ * probe_irq_on() returns a mask of allocated irq's.
+ *
+ * probe_irq_off() takes the mask as a parameter,
+ * and returns the irq number which occurred,
+ * or zero if none occurred, or a negative irq number
+ * if more than one irq occurred.
+ */
+extern unsigned long probe_irq_on(void); /* returns 0 on failure */
+extern int probe_irq_off(unsigned long); /* returns 0 or negative on failure */
+extern unsigned int probe_irq_mask(unsigned long); /* returns mask of ISA interrupts */
+
+#endif
diff --git a/xen/include/xeno/ioctl.h b/xen/include/xeno/ioctl.h
new file mode 100644
index 0000000000..aa91eb3951
--- /dev/null
+++ b/xen/include/xeno/ioctl.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_IOCTL_H
+#define _LINUX_IOCTL_H
+
+#include <asm/ioctl.h>
+
+#endif /* _LINUX_IOCTL_H */
+
diff --git a/xen/include/xeno/ioport.h b/xen/include/xeno/ioport.h
new file mode 100644
index 0000000000..0416edc71e
--- /dev/null
+++ b/xen/include/xeno/ioport.h
@@ -0,0 +1,121 @@
+/*
+ * ioport.h Definitions of routines for detecting, reserving and
+ * allocating system resources.
+ *
+ * Authors: Linus Torvalds
+ */
+
+#ifndef _LINUX_IOPORT_H
+#define _LINUX_IOPORT_H
+
+/*
+ * Resources are tree-like, allowing
+ * nesting etc..
+ */
+struct resource {
+ const char *name;
+ unsigned long start, end;
+ unsigned long flags;
+ struct resource *parent, *sibling, *child;
+};
+
+struct resource_list {
+ struct resource_list *next;
+ struct resource *res;
+ struct pci_dev *dev;
+};
+
+/*
+ * IO resources have these defined flags.
+ */
+#define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */
+
+#define IORESOURCE_IO 0x00000100 /* Resource type */
+#define IORESOURCE_MEM 0x00000200
+#define IORESOURCE_IRQ 0x00000400
+#define IORESOURCE_DMA 0x00000800
+
+#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
+#define IORESOURCE_READONLY 0x00002000
+#define IORESOURCE_CACHEABLE 0x00004000
+#define IORESOURCE_RANGELENGTH 0x00008000
+#define IORESOURCE_SHADOWABLE 0x00010000
+#define IORESOURCE_BUS_HAS_VGA 0x00080000
+
+#define IORESOURCE_UNSET 0x20000000
+#define IORESOURCE_AUTO 0x40000000
+#define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */
+
+/* ISA PnP IRQ specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_IRQ_HIGHEDGE (1<<0)
+#define IORESOURCE_IRQ_LOWEDGE (1<<1)
+#define IORESOURCE_IRQ_HIGHLEVEL (1<<2)
+#define IORESOURCE_IRQ_LOWLEVEL (1<<3)
+
+/* ISA PnP DMA specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_DMA_TYPE_MASK (3<<0)
+#define IORESOURCE_DMA_8BIT (0<<0)
+#define IORESOURCE_DMA_8AND16BIT (1<<0)
+#define IORESOURCE_DMA_16BIT (2<<0)
+
+#define IORESOURCE_DMA_MASTER (1<<2)
+#define IORESOURCE_DMA_BYTE (1<<3)
+#define IORESOURCE_DMA_WORD (1<<4)
+
+#define IORESOURCE_DMA_SPEED_MASK (3<<6)
+#define IORESOURCE_DMA_COMPATIBLE (0<<6)
+#define IORESOURCE_DMA_TYPEA (1<<6)
+#define IORESOURCE_DMA_TYPEB (2<<6)
+#define IORESOURCE_DMA_TYPEF (3<<6)
+
+/* ISA PnP memory I/O specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_MEM_WRITEABLE (1<<0) /* dup: IORESOURCE_READONLY */
+#define IORESOURCE_MEM_CACHEABLE (1<<1) /* dup: IORESOURCE_CACHEABLE */
+#define IORESOURCE_MEM_RANGELENGTH (1<<2) /* dup: IORESOURCE_RANGELENGTH */
+#define IORESOURCE_MEM_TYPE_MASK (3<<3)
+#define IORESOURCE_MEM_8BIT (0<<3)
+#define IORESOURCE_MEM_16BIT (1<<3)
+#define IORESOURCE_MEM_8AND16BIT (2<<3)
+#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */
+#define IORESOURCE_MEM_EXPANSIONROM (1<<6)
+
+/* PC/ISA/whatever - the normal PC address spaces: IO and memory */
+extern struct resource ioport_resource;
+extern struct resource iomem_resource;
+
+extern int get_resource_list(struct resource *, char *buf, int size);
+
+extern int check_resource(struct resource *root, unsigned long, unsigned long);
+extern int request_resource(struct resource *root, struct resource *new);
+extern int release_resource(struct resource *new);
+extern int allocate_resource(struct resource *root, struct resource *new,
+ unsigned long size,
+ unsigned long min, unsigned long max,
+ unsigned long align,
+ void (*alignf)(void *, struct resource *,
+ unsigned long, unsigned long),
+ void *alignf_data);
+
+/* Convenience shorthand with allocation */
+#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name))
+#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name))
+
+extern struct resource * __request_region(struct resource *, unsigned long start, unsigned long n, const char *name);
+
+/* Compatibility cruft */
+#define check_region(start,n) __check_region(&ioport_resource, (start), (n))
+#define release_region(start,n) __release_region(&ioport_resource, (start), (n))
+#define check_mem_region(start,n) __check_region(&iomem_resource, (start), (n))
+#define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n))
+
+extern int __check_region(struct resource *, unsigned long, unsigned long);
+extern void __release_region(struct resource *, unsigned long, unsigned long);
+
+#define get_ioport_list(buf) get_resource_list(&ioport_resource, buf, PAGE_SIZE)
+#define get_mem_list(buf) get_resource_list(&iomem_resource, buf, PAGE_SIZE)
+
+#define HAVE_AUTOIRQ
+extern void autoirq_setup(int waittime);
+extern int autoirq_report(int waittime);
+
+#endif /* _LINUX_IOPORT_H */
diff --git a/xen/include/xeno/irq.h b/xen/include/xeno/irq.h
new file mode 100644
index 0000000000..7342491345
--- /dev/null
+++ b/xen/include/xeno/irq.h
@@ -0,0 +1,63 @@
+#ifndef __irq_h
+#define __irq_h
+
+#include <xeno/config.h>
+#include <xeno/spinlock.h>
+#include <asm/ptrace.h>
+
+/*
+ * IRQ line status.
+ */
+#define IRQ_INPROGRESS 1 /* IRQ handler active - do not enter! */
+#define IRQ_DISABLED 2 /* IRQ disabled - do not enter! */
+#define IRQ_PENDING 4 /* IRQ pending - replay on enable */
+#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */
+#define IRQ_AUTODETECT 16 /* IRQ is being autodetected */
+#define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */
+#define IRQ_LEVEL 64 /* IRQ level triggered */
+#define IRQ_MASKED 128 /* IRQ masked - shouldn't be seen again */
+#define IRQ_PER_CPU 256 /* IRQ is per CPU */
+
+/*
+ * Interrupt controller descriptor. This is all we need
+ * to describe about the low-level hardware.
+ */
+struct hw_interrupt_type {
+ const char * typename;
+ unsigned int (*startup)(unsigned int irq);
+ void (*shutdown)(unsigned int irq);
+ void (*enable)(unsigned int irq);
+ void (*disable)(unsigned int irq);
+ void (*ack)(unsigned int irq);
+ void (*end)(unsigned int irq);
+ void (*set_affinity)(unsigned int irq, unsigned long mask);
+};
+
+typedef struct hw_interrupt_type hw_irq_controller;
+
+#include <asm/irq.h>
+
+/*
+ * This is the "IRQ descriptor", which contains various information
+ * about the irq, including what kind of hardware handling it has,
+ * whether it is disabled etc etc.
+ *
+ * Pad this out to 32 bytes for cache and indexing reasons.
+ */
+typedef struct {
+ unsigned int status; /* IRQ status */
+ hw_irq_controller *handler;
+ struct irqaction *action; /* IRQ action list */
+ unsigned int depth; /* nested irq disables */
+ spinlock_t lock;
+} ____cacheline_aligned irq_desc_t;
+
+extern irq_desc_t irq_desc [NR_IRQS];
+
+extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+extern int setup_irq(unsigned int , struct irqaction * );
+
+extern hw_irq_controller no_irq_type; /* needed in every arch ? */
+extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
+
+#endif /* __asm_h */
diff --git a/xen/include/xeno/irq_cpustat.h b/xen/include/xeno/irq_cpustat.h
new file mode 100644
index 0000000000..646655403a
--- /dev/null
+++ b/xen/include/xeno/irq_cpustat.h
@@ -0,0 +1,34 @@
+#ifndef __irq_cpustat_h
+#define __irq_cpustat_h
+
+/*
+ * Contains default mappings for irq_cpustat_t, used by almost every
+ * architecture. Some arch (like s390) have per cpu hardware pages and
+ * they define their own mappings for irq_stat.
+ *
+ * Keith Owens <kaos@ocs.com.au> July 2000.
+ */
+
+#include <xeno/config.h>
+
+/*
+ * Simple wrappers reducing source bloat. Define all irq_stat fields
+ * here, even ones that are arch dependent. That way we get common
+ * definitions instead of differing sets for each arch.
+ */
+
+extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */
+
+#ifdef CONFIG_SMP
+#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
+#else
+#define __IRQ_STAT(cpu, member) ((void)(cpu), irq_stat[0].member)
+#endif
+
+ /* arch independent irq_stat fields */
+#define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending)
+#define local_irq_count(cpu) __IRQ_STAT((cpu), __local_irq_count)
+#define local_bh_count(cpu) __IRQ_STAT((cpu), __local_bh_count)
+#define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count)
+
+#endif /* __irq_cpustat_h */
diff --git a/xen/include/xeno/kdev_t.h b/xen/include/xeno/kdev_t.h
new file mode 100644
index 0000000000..9d85cba3e5
--- /dev/null
+++ b/xen/include/xeno/kdev_t.h
@@ -0,0 +1,123 @@
+#ifndef _LINUX_KDEV_T_H
+#define _LINUX_KDEV_T_H
+#if defined(__KERNEL__) || defined(_LVM_H_INCLUDE)
+/*
+As a preparation for the introduction of larger device numbers,
+we introduce a type kdev_t to hold them. No information about
+this type is known outside of this include file.
+
+Objects of type kdev_t designate a device. Outside of the kernel
+the corresponding things are objects of type dev_t - usually an
+integral type with the device major and minor in the high and low
+bits, respectively. Conversion is done by
+
+extern kdev_t to_kdev_t(int);
+
+It is up to the various file systems to decide how objects of type
+dev_t are stored on disk.
+The only other point of contact between kernel and outside world
+are the system calls stat and mknod, new versions of which will
+eventually have to be used in libc.
+
+[Unfortunately, the floppy control ioctls fail to hide the internal
+kernel structures, and the fd_device field of a struct floppy_drive_struct
+is user-visible. So, it remains a dev_t for the moment, with some ugly
+conversions in floppy.c.]
+
+Inside the kernel, we aim for a kdev_t type that is a pointer
+to a structure with information about the device (like major,
+minor, size, blocksize, sectorsize, name, read-only flag,
+struct file_operations etc.).
+
+However, for the time being we let kdev_t be almost the same as dev_t:
+
+typedef struct { unsigned short major, minor; } kdev_t;
+
+Admissible operations on an object of type kdev_t:
+- passing it along
+- comparing it for equality with another such object
+- storing it in ROOT_DEV, inode->i_dev, inode->i_rdev, sb->s_dev,
+ bh->b_dev, req->rq_dev, de->dc_dev, tty->device
+- using its bit pattern as argument in a hash function
+- finding its major and minor
+- complaining about it
+
+An object of type kdev_t is created only by the function MKDEV(),
+with the single exception of the constant 0 (no device).
+
+Right now the other information mentioned above is usually found
+in static arrays indexed by major or major,minor.
+
+An obstacle to immediately using
+ typedef struct { ... (* lots of information *) } *kdev_t
+is the case of mknod used to create a block device that the
+kernel doesn't know about at present (but first learns about
+when some module is inserted).
+
+aeb - 950811
+*/
+
+/* Since MINOR(dev) is used as index in static arrays,
+ the kernel is not quite ready yet for larger minors.
+ However, everything runs fine with an arbitrary kdev_t type. */
+
+#define MINORBITS 8
+#define MINORMASK ((1U << MINORBITS) - 1)
+
+typedef unsigned short kdev_t;
+
+#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+#define HASHDEV(dev) ((unsigned int) (dev))
+#define NODEV 0
+#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
+#define B_FREE 0xffff /* yuk */
+
+extern const char * kdevname(kdev_t); /* note: returns pointer to static data! */
+
+/* 2.5.x compatibility */
+#define mk_kdev(a,b) MKDEV(a,b)
+#define major(d) MAJOR(d)
+#define minor(d) MINOR(d)
+#define kdev_same(a,b) ((a) == (b))
+#define kdev_none(d) (!(d))
+#define kdev_val(d) ((unsigned int)(d))
+#define val_to_kdev(d) ((kdev_t)(d))
+
+/*
+As long as device numbers in the outside world have 16 bits only,
+we use these conversions.
+*/
+
+static inline unsigned int kdev_t_to_nr(kdev_t dev) {
+ return (MAJOR(dev)<<8) | MINOR(dev);
+}
+
+static inline kdev_t to_kdev_t(int dev)
+{
+ int major, minor;
+#if 0
+ major = (dev >> 16);
+ if (!major) {
+ major = (dev >> 8);
+ minor = (dev & 0xff);
+ } else
+ minor = (dev & 0xffff);
+#else
+ major = (dev >> 8);
+ minor = (dev & 0xff);
+#endif
+ return MKDEV(major, minor);
+}
+
+#else /* __KERNEL__ || _LVM_H_INCLUDE */
+
+/*
+Some programs want their definitions of MAJOR and MINOR and MKDEV
+from the kernel sources. These must be the externally visible ones.
+*/
+#define MAJOR(dev) ((dev)>>8)
+#define MINOR(dev) ((dev) & 0xff)
+#define MKDEV(ma,mi) ((ma)<<8 | (mi))
+#endif /* __KERNEL__ || _LVM_H_INCLUDE */
+#endif
diff --git a/xen/include/xeno/kernel.h b/xen/include/xeno/kernel.h
new file mode 100644
index 0000000000..993a6c19cf
--- /dev/null
+++ b/xen/include/xeno/kernel.h
@@ -0,0 +1,37 @@
+#ifndef _LINUX_KERNEL_H
+#define _LINUX_KERNEL_H
+
+/*
+ * 'kernel.h' contains some often-used function prototypes etc
+ */
+
+/*
+ * min()/max() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ */
+#define min(x,y) ({ \
+ const typeof(x) _x = (x); \
+ const typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+ const typeof(x) _x = (x); \
+ const typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x > _y ? _x : _y; })
+
+/*
+ * ..and if you can't take the strict
+ * types, you can specify one yourself.
+ *
+ * Or not use min/max at all, of course.
+ */
+#define min_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#define max_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+
+#endif /* _LINUX_KERNEL_H */
+
diff --git a/xen/include/xeno/keyhandler.h b/xen/include/xeno/keyhandler.h
new file mode 100644
index 0000000000..d03e09aa47
--- /dev/null
+++ b/xen/include/xeno/keyhandler.h
@@ -0,0 +1,16 @@
+/*
+** We keep an array of 'handlers' for each key code between 0 and 255;
+** this is intended to allow very simple debugging routines (toggle
+** debug flag, dump registers, reboot, etc) to be hooked in in a slightly
+** nicer fashion than just editing the serial/keyboard drivers.
+*/
+#include <xeno/sched.h>
+
+typedef void key_handler(unsigned char key, void *dev_id,
+ struct pt_regs *regs);
+
+extern void add_key_handler(unsigned char key,
+ key_handler *handler, char *desc);
+
+extern key_handler *get_key_handler(unsigned char key);
+
diff --git a/xen/include/xeno/lib.h b/xen/include/xeno/lib.h
new file mode 100644
index 0000000000..cd40d119f0
--- /dev/null
+++ b/xen/include/xeno/lib.h
@@ -0,0 +1,51 @@
+#ifndef __LIB_H__
+#define __LIB_H__
+
+#include <stdarg.h>
+#include <xeno/types.h>
+
+#ifndef NDEBUG
+#define ASSERT(_p) if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define reserve_bootmem(_p,_l) \
+printk("Memory Reservation 0x%lx, %lu bytes\n", (_p), (_l))
+
+/* lib.c */
+int memcmp(const void * cs,const void * ct,size_t count);
+void * memcpy(void * dest,const void *src,size_t count);
+int strncmp(const char * cs,const char * ct,size_t count);
+int strcmp(const char * cs,const char * ct);
+char * strcpy(char * dest,const char *src);
+char * strncpy(char * dest,const char *src,size_t count);
+void * memset(void * s,int c,size_t count);
+size_t strnlen(const char * s, size_t count);
+size_t strlen(const char * s);
+char * strchr(const char *,int);
+char * strstr(const char * s1,const char * s2);
+unsigned long str_to_quad(unsigned char *s);
+unsigned char *quad_to_str(unsigned long q, unsigned char *s);
+
+/* kernel.c */
+#define printk printf
+void printf (const char *format, ...);
+void cls(void);
+void panic(const char *format, ...);
+
+/* vsprintf.c */
+extern int sprintf(char * buf, const char * fmt, ...)
+ __attribute__ ((format (printf, 2, 3)));
+extern int vsprintf(char *buf, const char *, va_list);
+extern int snprintf(char * buf, size_t size, const char * fmt, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+extern int sscanf(const char *, const char *, ...)
+ __attribute__ ((format (scanf,2,3)));
+extern int vsscanf(const char *, const char *, va_list);
+long simple_strtol(const char *cp,char **endp,unsigned int base);
+long long simple_strtoll(const char *cp,char **endp,unsigned int base);
+
+#endif /* __LIB_H__ */
diff --git a/xen/include/xeno/list.h b/xen/include/xeno/list.h
new file mode 100644
index 0000000000..4124a9a037
--- /dev/null
+++ b/xen/include/xeno/list.h
@@ -0,0 +1,160 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+ struct list_head *first = list->next;
+
+ if (first != list) {
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#endif
diff --git a/xen/include/xeno/major.h b/xen/include/xeno/major.h
new file mode 100644
index 0000000000..b30f88baf8
--- /dev/null
+++ b/xen/include/xeno/major.h
@@ -0,0 +1,199 @@
+#ifndef _LINUX_MAJOR_H
+#define _LINUX_MAJOR_H
+
+#include <xeno/kdev_t.h>
+
+/*
+ * This file has definitions for major device numbers.
+ * For the device number assignments, see Documentation/devices.txt.
+ */
+
+/* limits */
+
+/*
+ * Important: Don't change this to 256. Major number 255 is and must be
+ * reserved for future expansion into a larger dev_t space.
+ */
+#define MAX_CHRDEV 255
+#define MAX_BLKDEV 255
+
+#define UNNAMED_MAJOR 0
+#define MEM_MAJOR 1
+#define RAMDISK_MAJOR 1
+#define FLOPPY_MAJOR 2
+#define PTY_MASTER_MAJOR 2
+#define IDE0_MAJOR 3
+#define PTY_SLAVE_MAJOR 3
+#define HD_MAJOR IDE0_MAJOR
+#define TTY_MAJOR 4
+#define TTYAUX_MAJOR 5
+#define LP_MAJOR 6
+#define VCS_MAJOR 7
+#define LOOP_MAJOR 7
+#define SCSI_DISK0_MAJOR 8
+#define SCSI_TAPE_MAJOR 9
+#define MD_MAJOR 9
+#define MISC_MAJOR 10
+#define SCSI_CDROM_MAJOR 11
+#define QIC02_TAPE_MAJOR 12
+#define XT_DISK_MAJOR 13
+#define SOUND_MAJOR 14
+#define CDU31A_CDROM_MAJOR 15
+#define JOYSTICK_MAJOR 15
+#define GOLDSTAR_CDROM_MAJOR 16
+#define OPTICS_CDROM_MAJOR 17
+#define SANYO_CDROM_MAJOR 18
+#define CYCLADES_MAJOR 19
+#define CYCLADESAUX_MAJOR 20
+#define MITSUMI_X_CDROM_MAJOR 20
+#define MFM_ACORN_MAJOR 21 /* ARM Linux /dev/mfm */
+#define SCSI_GENERIC_MAJOR 21
+#define Z8530_MAJOR 34
+#define DIGI_MAJOR 23
+#define IDE1_MAJOR 22
+#define DIGICU_MAJOR 22
+#define MITSUMI_CDROM_MAJOR 23
+#define CDU535_CDROM_MAJOR 24
+#define STL_SERIALMAJOR 24
+#define MATSUSHITA_CDROM_MAJOR 25
+#define STL_CALLOUTMAJOR 25
+#define MATSUSHITA_CDROM2_MAJOR 26
+#define QIC117_TAPE_MAJOR 27
+#define MATSUSHITA_CDROM3_MAJOR 27
+#define MATSUSHITA_CDROM4_MAJOR 28
+#define STL_SIOMEMMAJOR 28
+#define ACSI_MAJOR 28
+#define AZTECH_CDROM_MAJOR 29
+#define GRAPHDEV_MAJOR 29 /* SparcLinux & Linux/68k /dev/fb */
+#define SHMIQ_MAJOR 85 /* Linux/mips, SGI /dev/shmiq */
+#define CM206_CDROM_MAJOR 32
+#define IDE2_MAJOR 33
+#define IDE3_MAJOR 34
+#define XPRAM_MAJOR 35 /* expanded storage on S/390 = "slow ram" */
+ /* proposed by Peter */
+#define NETLINK_MAJOR 36
+#define PS2ESDI_MAJOR 36
+#define IDETAPE_MAJOR 37
+#define Z2RAM_MAJOR 37
+#define APBLOCK_MAJOR 38 /* AP1000 Block device */
+#define DDV_MAJOR 39 /* AP1000 DDV block device */
+#define NBD_MAJOR 43 /* Network block device */
+#define RISCOM8_NORMAL_MAJOR 48
+#define DAC960_MAJOR 48 /* 48..55 */
+#define RISCOM8_CALLOUT_MAJOR 49
+#define MKISS_MAJOR 55
+#define DSP56K_MAJOR 55 /* DSP56001 processor device */
+
+#define IDE4_MAJOR 56
+#define IDE5_MAJOR 57
+
+#define LVM_BLK_MAJOR 58 /* Logical Volume Manager */
+
+#define SCSI_DISK1_MAJOR 65
+#define SCSI_DISK2_MAJOR 66
+#define SCSI_DISK3_MAJOR 67
+#define SCSI_DISK4_MAJOR 68
+#define SCSI_DISK5_MAJOR 69
+#define SCSI_DISK6_MAJOR 70
+#define SCSI_DISK7_MAJOR 71
+
+
+#define COMPAQ_SMART2_MAJOR 72
+#define COMPAQ_SMART2_MAJOR1 73
+#define COMPAQ_SMART2_MAJOR2 74
+#define COMPAQ_SMART2_MAJOR3 75
+#define COMPAQ_SMART2_MAJOR4 76
+#define COMPAQ_SMART2_MAJOR5 77
+#define COMPAQ_SMART2_MAJOR6 78
+#define COMPAQ_SMART2_MAJOR7 79
+
+#define SPECIALIX_NORMAL_MAJOR 75
+#define SPECIALIX_CALLOUT_MAJOR 76
+
+#define COMPAQ_CISS_MAJOR 104
+#define COMPAQ_CISS_MAJOR1 105
+#define COMPAQ_CISS_MAJOR2 106
+#define COMPAQ_CISS_MAJOR3 107
+#define COMPAQ_CISS_MAJOR4 108
+#define COMPAQ_CISS_MAJOR5 109
+#define COMPAQ_CISS_MAJOR6 110
+#define COMPAQ_CISS_MAJOR7 111
+
+#define ATARAID_MAJOR 114
+
+#define DASD_MAJOR 94 /* Official assignations from Peter */
+
+#define MDISK_MAJOR 95 /* Official assignations from Peter */
+
+#define I2O_MAJOR 80 /* 80->87 */
+
+#define IDE6_MAJOR 88
+#define IDE7_MAJOR 89
+#define IDE8_MAJOR 90
+#define IDE9_MAJOR 91
+
+#define UBD_MAJOR 98
+
+#define AURORA_MAJOR 79
+
+#define JSFD_MAJOR 99
+
+#define PHONE_MAJOR 100
+
+#define LVM_CHAR_MAJOR 109 /* Logical Volume Manager */
+
+#define UMEM_MAJOR 116 /* http://www.umem.com/ Battery Backed RAM */
+
+#define RTF_MAJOR 150
+#define RAW_MAJOR 162
+
+#define USB_ACM_MAJOR 166
+#define USB_ACM_AUX_MAJOR 167
+#define USB_CHAR_MAJOR 180
+
+#define UNIX98_PTY_MASTER_MAJOR 128
+#define UNIX98_PTY_MAJOR_COUNT 8
+#define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
+
+#define VXVM_MAJOR 199 /* VERITAS volume i/o driver */
+#define VXSPEC_MAJOR 200 /* VERITAS volume config driver */
+#define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */
+
+#define MSR_MAJOR 202
+#define CPUID_MAJOR 203
+
+#define OSST_MAJOR 206 /* OnStream-SCx0 SCSI tape */
+
+#define IBM_TTY3270_MAJOR 227 /* Official allocations now */
+#define IBM_FS3270_MAJOR 228
+
+/*
+ * Tests for SCSI devices.
+ */
+
+#define SCSI_DISK_MAJOR(M) ((M) == SCSI_DISK0_MAJOR || \
+ ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR))
+
+#define SCSI_BLK_MAJOR(M) \
+ (SCSI_DISK_MAJOR(M) \
+ || (M) == SCSI_CDROM_MAJOR)
+
+static __inline__ int scsi_blk_major(int m) {
+ return SCSI_BLK_MAJOR(m);
+}
+
+/*
+ * Tests for IDE devices
+ */
+#define IDE_DISK_MAJOR(M) ((M) == IDE0_MAJOR || (M) == IDE1_MAJOR || \
+ (M) == IDE2_MAJOR || (M) == IDE3_MAJOR || \
+ (M) == IDE4_MAJOR || (M) == IDE5_MAJOR || \
+ (M) == IDE6_MAJOR || (M) == IDE7_MAJOR || \
+ (M) == IDE8_MAJOR || (M) == IDE9_MAJOR)
+
+static __inline__ int ide_blk_major(int m)
+{
+ return IDE_DISK_MAJOR(m);
+}
+
+#endif
diff --git a/xen/include/xeno/mii.h b/xen/include/xeno/mii.h
new file mode 100644
index 0000000000..943913583d
--- /dev/null
+++ b/xen/include/xeno/mii.h
@@ -0,0 +1,165 @@
+/*
+ * linux/mii.h: definitions for MII-compatible transceivers
+ * Originally drivers/net/sunhme.h.
+ *
+ * Copyright (C) 1996, 1999, 2001 David S. Miller (davem@redhat.com)
+ */
+
+#ifndef __LINUX_MII_H__
+#define __LINUX_MII_H__
+
+#include <linux/types.h>
+
+/* Generic MII registers. */
+
+#define MII_BMCR 0x00 /* Basic mode control register */
+#define MII_BMSR 0x01 /* Basic mode status register */
+#define MII_PHYSID1 0x02 /* PHYS ID 1 */
+#define MII_PHYSID2 0x03 /* PHYS ID 2 */
+#define MII_ADVERTISE 0x04 /* Advertisement control reg */
+#define MII_LPA 0x05 /* Link partner ability reg */
+#define MII_EXPANSION 0x06 /* Expansion register */
+#define MII_DCOUNTER 0x12 /* Disconnect counter */
+#define MII_FCSCOUNTER 0x13 /* False carrier counter */
+#define MII_NWAYTEST 0x14 /* N-way auto-neg test reg */
+#define MII_RERRCOUNTER 0x15 /* Receive error counter */
+#define MII_SREVISION 0x16 /* Silicon revision */
+#define MII_RESV1 0x17 /* Reserved... */
+#define MII_LBRERROR 0x18 /* Lpback, rx, bypass error */
+#define MII_PHYADDR 0x19 /* PHY address */
+#define MII_RESV2 0x1a /* Reserved... */
+#define MII_TPISTATUS 0x1b /* TPI status for 10mbps */
+#define MII_NCONFIG 0x1c /* Network interface config */
+
+/* Basic mode control register. */
+#define BMCR_RESV 0x007f /* Unused... */
+#define BMCR_CTST 0x0080 /* Collision test */
+#define BMCR_FULLDPLX 0x0100 /* Full duplex */
+#define BMCR_ANRESTART 0x0200 /* Auto negotiation restart */
+#define BMCR_ISOLATE 0x0400 /* Disconnect DP83840 from MII */
+#define BMCR_PDOWN 0x0800 /* Powerdown the DP83840 */
+#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */
+#define BMCR_SPEED100 0x2000 /* Select 100Mbps */
+#define BMCR_LOOPBACK 0x4000 /* TXD loopback bits */
+#define BMCR_RESET 0x8000 /* Reset the DP83840 */
+
+/* Basic mode status register. */
+#define BMSR_ERCAP 0x0001 /* Ext-reg capability */
+#define BMSR_JCD 0x0002 /* Jabber detected */
+#define BMSR_LSTATUS 0x0004 /* Link status */
+#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */
+#define BMSR_RFAULT 0x0010 /* Remote fault detected */
+#define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */
+#define BMSR_RESV 0x07c0 /* Unused... */
+#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */
+#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */
+#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */
+#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */
+#define BMSR_100BASE4 0x8000 /* Can do 100mbps, 4k packets */
+
+/* Advertisement control register. */
+#define ADVERTISE_SLCT 0x001f /* Selector bits */
+#define ADVERTISE_CSMA 0x0001 /* Only selector supported */
+#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */
+#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */
+#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */
+#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */
+#define ADVERTISE_100BASE4 0x0200 /* Try for 100mbps 4k packets */
+#define ADVERTISE_RESV 0x1c00 /* Unused... */
+#define ADVERTISE_RFAULT 0x2000 /* Say we can detect faults */
+#define ADVERTISE_LPACK 0x4000 /* Ack link partners response */
+#define ADVERTISE_NPAGE 0x8000 /* Next page bit */
+
+#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
+ ADVERTISE_100HALF | ADVERTISE_100FULL)
+
+/* Link partner ability register. */
+#define LPA_SLCT 0x001f /* Same as advertise selector */
+#define LPA_10HALF 0x0020 /* Can do 10mbps half-duplex */
+#define LPA_10FULL 0x0040 /* Can do 10mbps full-duplex */
+#define LPA_100HALF 0x0080 /* Can do 100mbps half-duplex */
+#define LPA_100FULL 0x0100 /* Can do 100mbps full-duplex */
+#define LPA_100BASE4 0x0200 /* Can do 100mbps 4k packets */
+#define LPA_RESV 0x1c00 /* Unused... */
+#define LPA_RFAULT 0x2000 /* Link partner faulted */
+#define LPA_LPACK 0x4000 /* Link partner acked us */
+#define LPA_NPAGE 0x8000 /* Next page bit */
+
+#define LPA_DUPLEX (LPA_10FULL | LPA_100FULL)
+#define LPA_100 (LPA_100FULL | LPA_100HALF | LPA_100BASE4)
+
+/* Expansion register for auto-negotiation. */
+#define EXPANSION_NWAY 0x0001 /* Can do N-way auto-nego */
+#define EXPANSION_LCWP 0x0002 /* Got new RX page code word */
+#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */
+#define EXPANSION_NPCAPABLE 0x0008 /* Link partner supports npage */
+#define EXPANSION_MFAULTS 0x0010 /* Multiple faults detected */
+#define EXPANSION_RESV 0xffe0 /* Unused... */
+
+/* N-way test register. */
+#define NWAYTEST_RESV1 0x00ff /* Unused... */
+#define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */
+#define NWAYTEST_RESV2 0xfe00 /* Unused... */
+
+/* This structure is used in all SIOCxMIIxxx ioctl calls */
+struct mii_ioctl_data {
+ u16 phy_id;
+ u16 reg_num;
+ u16 val_in;
+ u16 val_out;
+};
+
+
+/**
+ * mii_nway_result
+ * @negotiated: value of MII ANAR and'd with ANLPAR
+ *
+ * Given a set of MII abilities, check each bit and returns the
+ * currently supported media, in the priority order defined by
+ * IEEE 802.3u. We use LPA_xxx constants but note this is not the
+ * value of LPA solely, as described above.
+ *
+ * The one exception to IEEE 802.3u is that 100baseT4 is placed
+ * between 100T-full and 100T-half. If your phy does not support
+ * 100T4 this is fine. If your phy places 100T4 elsewhere in the
+ * priority order, you will need to roll your own function.
+ */
+static inline unsigned int mii_nway_result (unsigned int negotiated)
+{
+ unsigned int ret;
+
+ if (negotiated & LPA_100FULL)
+ ret = LPA_100FULL;
+ else if (negotiated & LPA_100BASE4)
+ ret = LPA_100BASE4;
+ else if (negotiated & LPA_100HALF)
+ ret = LPA_100HALF;
+ else if (negotiated & LPA_10FULL)
+ ret = LPA_10FULL;
+ else
+ ret = LPA_10HALF;
+
+ return ret;
+}
+
+/**
+ * mii_duplex
+ * @duplex_lock: Non-zero if duplex is locked at full
+ * @negotiated: value of MII ANAR and'd with ANLPAR
+ *
+ * A small helper function for a common case. Returns one
+ * if the media is operating or locked at full duplex, and
+ * returns zero otherwise.
+ */
+static inline unsigned int mii_duplex (unsigned int duplex_lock,
+ unsigned int negotiated)
+{
+ if (duplex_lock)
+ return 1;
+ if (mii_nway_result(negotiated) & LPA_DUPLEX)
+ return 1;
+ return 0;
+}
+
+
+#endif /* __LINUX_MII_H__ */
diff --git a/xen/include/xeno/mm.h b/xen/include/xeno/mm.h
new file mode 100644
index 0000000000..6605f1ed58
--- /dev/null
+++ b/xen/include/xeno/mm.h
@@ -0,0 +1,142 @@
+
+#ifndef __XENO_MM_H__
+#define __XENO_MM_H__
+
+#include <xeno/config.h>
+#include <asm/atomic.h>
+#include <asm/desc.h>
+#include <xeno/list.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <xeno/spinlock.h>
+
+/* XXX KAF: These may die eventually, but so many refs in slab.c :((( */
+
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
+#define __GFP_DMA 0x01
+
+/* Action modifiers - doesn't change the zoning */
+#define __GFP_WAIT 0x10 /* Can wait and reschedule? */
+#define __GFP_HIGH 0x20 /* Should access emergency pools? */
+#define __GFP_IO 0x40 /* Can start low memory physical IO? */
+#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */
+#define __GFP_FS 0x100 /* Can call down to low-level FS? */
+
+#define GFP_ATOMIC (__GFP_HIGH)
+#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+
+/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
+ platforms, used as appropriate on others */
+
+#define GFP_DMA __GFP_DMA
+
+
+/******************************************************************************
+ * The following is for page_alloc.c.
+ */
+
+void init_page_allocator(unsigned long min, unsigned long max);
+unsigned long __get_free_pages(int mask, int order);
+void __free_pages(unsigned long p, int order);
+#define get_free_page(_m) (__get_free_pages((_m),0))
+#define __get_free_page(_m) (__get_free_pages((_m),0))
+#define free_pages(_p,_o) (__free_pages(_p,_o))
+#define free_page(_p) (__free_pages(_p,0))
+
+
+/******************************************************************************
+ * The following is the array of page info. One entry per page owned
+ * by the hypervisor, indexed from `mem_map', just like Linux.
+ *
+ * 12.11.02. We no longer use struct page or mem_map, these are replaced
+ * with struct pfn_info and frame_table respectively. Boris Dragovic
+ */
+
+/*
+ * This is still fatter than I'd like. Do we need the count?
+ * Do we need the flags? The list at least seems req'd by slab.c.
+ */
+typedef struct pfn_info {
+ struct list_head list; /* ->mapping has some page lists. */
+ unsigned long flags; /* atomic flags. */
+ unsigned long tot_count; /* Total domain usage count. */
+ unsigned long type_count; /* pagetable/dir, or domain-writeable refs. */
+} frame_table_t;
+
+/*
+ * We use a high bit to indicate that a page is pinned.
+ * We do not use the top bit as that would mean that we'd get confused with
+ * -ve error numbers in some places in common/memory.c.
+ */
+#define REFCNT_PIN_BIT 0x40000000UL
+
+#define get_page_tot(p) ((p)->tot_count++)
+#define put_page_tot(p) (--(p)->tot_count)
+#define page_tot_count(p) ((p)->tot_count)
+#define set_page_tot_count(p,v) ((p)->tot_count = v)
+
+#define get_page_type(p) ((p)->type_count++)
+#define put_page_type(p) (--(p)->type_count)
+#define page_type_count(p) ((p)->type_count)
+#define set_page_type_count(p,v) ((p)->type_count = v)
+
+#define PG_domain_mask 0x00ffffff /* owning domain (24 bits) */
+/* hypervisor flags (domain == 0) */
+#define PG_slab 24
+/* domain flags (domain != 0) */
+/*
+ * NB. The following three flags are MUTUALLY EXCLUSIVE!
+ * At most one can be true at any point, and 'type_count' counts how many
+ * references exist of teh current type. A change in type can only occur
+ * when type_count == 0.
+ */
+#define PG_type_mask (15<<24) /* bits 24-27 */
+#define PGT_none (0<<24) /* no special uses of this page */
+#define PGT_l1_page_table (1<<24) /* using this page as an L1 page table? */
+#define PGT_l2_page_table (2<<24) /* using this page as an L2 page table? */
+#define PGT_l3_page_table (3<<24) /* using this page as an L3 page table? */
+#define PGT_l4_page_table (4<<24) /* using this page as an L4 page table? */
+#define PGT_gdt_page (5<<24) /* using this page in a GDT? */
+#define PGT_ldt_page (6<<24) /* using this page in an LDT? */
+#define PGT_writeable_page (7<<24) /* has writable mappings of this page? */
+#define PGT_net_rx_buf (8<<24) /* this page has been pirated by the net code. */
+
+#define PageSlab(page) test_bit(PG_slab, &(page)->flags)
+#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags)
+#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags)
+
+#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
+ do { \
+ (_pfn)->flags = (_dom) | PGT_writeable_page; \
+ (_pfn)->tot_count = (_pfn)->type_count = 1; \
+ } while ( 0 )
+
+#define UNSHARE_PFN(_pfn) \
+ (_pfn)->flags = (_pfn)->type_count = (_pfn)->tot_count = 0
+
+/* The array of struct pfn_info,
+ * free pfn list and number of free pfns in the free list
+ */
+extern frame_table_t * frame_table;
+extern unsigned long frame_table_size;
+extern struct list_head free_list;
+extern spinlock_t free_list_lock;
+extern unsigned int free_pfns;
+extern unsigned long max_page;
+void init_frametable(unsigned long nr_pages);
+
+/*
+ * The MPT (machine->physical mapping table) is an array of word-sized
+ * values, indexed on machine frame number. It is expected that guest OSes
+ * will use it to store a "physical" frame number to give the appearance of
+ * contiguous (or near contiguous) physical memory.
+ */
+#undef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
+
+/* Part of the domain API. */
+int do_process_page_updates(page_update_request_t *updates, int count);
+
+#define DEFAULT_GDT_ENTRIES ((FIRST_DOMAIN_GDT_ENTRY*8)-1)
+#define DEFAULT_GDT_ADDRESS ((unsigned long)gdt_table)
+
+#endif /* __XENO_MM_H__ */
diff --git a/xen/include/xeno/module.h b/xen/include/xeno/module.h
new file mode 100644
index 0000000000..5e8ce698d6
--- /dev/null
+++ b/xen/include/xeno/module.h
@@ -0,0 +1,417 @@
+/*
+ * Dynamic loading of modules into the kernel.
+ *
+ * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
+ */
+
+#ifndef _LINUX_MODULE_H
+#define _LINUX_MODULE_H
+
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#ifdef __GENKSYMS__
+# define _set_ver(sym) sym
+# undef MODVERSIONS
+# define MODVERSIONS
+#else /* ! __GENKSYMS__ */
+# if !defined(MODVERSIONS) && defined(EXPORT_SYMTAB)
+# define _set_ver(sym) sym
+# include <linux/modversions.h>
+# endif
+#endif /* __GENKSYMS__ */
+
+#include <asm/atomic.h>
+
+/* Don't need to bring in all of uaccess.h just for this decl. */
+struct exception_table_entry;
+
+/* Used by get_kernel_syms, which is obsolete. */
+struct kernel_sym
+{
+ unsigned long value;
+ char name[60]; /* should have been 64-sizeof(long); oh well */
+};
+
+struct module_symbol
+{
+ unsigned long value;
+ const char *name;
+};
+
+struct module_ref
+{
+ struct module *dep; /* "parent" pointer */
+ struct module *ref; /* "child" pointer */
+ struct module_ref *next_ref;
+};
+
+/* TBD */
+struct module_persist;
+
+struct module
+{
+ unsigned long size_of_struct; /* == sizeof(module) */
+ struct module *next;
+ const char *name;
+ unsigned long size;
+
+ union
+ {
+ atomic_t usecount;
+ long pad;
+ } uc; /* Needs to keep its size - so says rth */
+
+ unsigned long flags; /* AUTOCLEAN et al */
+
+ unsigned nsyms;
+ unsigned ndeps;
+
+ struct module_symbol *syms;
+ struct module_ref *deps;
+ struct module_ref *refs;
+ int (*init)(void);
+ void (*cleanup)(void);
+ const struct exception_table_entry *ex_table_start;
+ const struct exception_table_entry *ex_table_end;
+#ifdef __alpha__
+ unsigned long gp;
+#endif
+ /* Members past this point are extensions to the basic
+ module support and are optional. Use mod_member_present()
+ to examine them. */
+ const struct module_persist *persist_start;
+ const struct module_persist *persist_end;
+ int (*can_unload)(void);
+ int runsize; /* In modutils, not currently used */
+ const char *kallsyms_start; /* All symbols for kernel debugging */
+ const char *kallsyms_end;
+ const char *archdata_start; /* arch specific data for module */
+ const char *archdata_end;
+ const char *kernel_data; /* Reserved for kernel internal use */
+};
+
+struct module_info
+{
+ unsigned long addr;
+ unsigned long size;
+ unsigned long flags;
+ long usecount;
+};
+
+/* Bits of module.flags. */
+
+#define MOD_UNINITIALIZED 0
+#define MOD_RUNNING 1
+#define MOD_DELETED 2
+#define MOD_AUTOCLEAN 4
+#define MOD_VISITED 8
+#define MOD_USED_ONCE 16
+#define MOD_JUST_FREED 32
+#define MOD_INITIALIZING 64
+
+/* Values for query_module's which. */
+
+#define QM_MODULES 1
+#define QM_DEPS 2
+#define QM_REFS 3
+#define QM_SYMBOLS 4
+#define QM_INFO 5
+
+/* Can the module be queried? */
+#define MOD_CAN_QUERY(mod) (((mod)->flags & (MOD_RUNNING | MOD_INITIALIZING)) && !((mod)->flags & MOD_DELETED))
+
+/* When struct module is extended, we must test whether the new member
+ is present in the header received from insmod before we can use it.
+ This function returns true if the member is present. */
+
+#define mod_member_present(mod,member) \
+ ((unsigned long)(&((struct module *)0L)->member + 1) \
+ <= (mod)->size_of_struct)
+
+/*
+ * Ditto for archdata. Assumes mod->archdata_start and mod->archdata_end
+ * are validated elsewhere.
+ */
+#define mod_archdata_member_present(mod, type, member) \
+ (((unsigned long)(&((type *)0L)->member) + \
+ sizeof(((type *)0L)->member)) <= \
+ ((mod)->archdata_end - (mod)->archdata_start))
+
+
+/* Check if an address p with number of entries n is within the body of module m */
+#define mod_bound(p, n, m) ((unsigned long)(p) >= ((unsigned long)(m) + ((m)->size_of_struct)) && \
+ (unsigned long)((p)+(n)) <= (unsigned long)(m) + (m)->size)
+
+/* Backwards compatibility definition. */
+
+#define GET_USE_COUNT(module) (atomic_read(&(module)->uc.usecount))
+
+/* Poke the use count of a module. */
+
+#define __MOD_INC_USE_COUNT(mod) \
+ (atomic_inc(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED|MOD_USED_ONCE)
+#define __MOD_DEC_USE_COUNT(mod) \
+ (atomic_dec(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED)
+#define __MOD_IN_USE(mod) \
+ (mod_member_present((mod), can_unload) && (mod)->can_unload \
+ ? (mod)->can_unload() : atomic_read(&(mod)->uc.usecount))
+
+/* Indirect stringification. */
+
+#define __MODULE_STRING_1(x) #x
+#define __MODULE_STRING(x) __MODULE_STRING_1(x)
+
+/* Generic inter module communication.
+ *
+ * NOTE: This interface is intended for small amounts of data that are
+ * passed between two objects and either or both of the objects
+ * might be compiled as modules. Do not over use this interface.
+ *
+ * If more than two objects need to communicate then you probably
+ * need a specific interface instead of abusing this generic
+ * interface. If both objects are *always* built into the kernel
+ * then a global extern variable is good enough, you do not need
+ * this interface.
+ *
+ * Keith Owens <kaos@ocs.com.au> 28 Oct 2000.
+ */
+
+#ifdef __KERNEL__
+#define HAVE_INTER_MODULE
+extern void inter_module_register(const char *, struct module *, const void *);
+extern void inter_module_unregister(const char *);
+extern const void *inter_module_get(const char *);
+extern const void *inter_module_get_request(const char *, const char *);
+extern void inter_module_put(const char *);
+
+struct inter_module_entry {
+ struct list_head list;
+ const char *im_name;
+ struct module *owner;
+ const void *userdata;
+};
+
+#if 0
+extern int try_inc_mod_count(struct module *mod);
+#else
+static inline int try_inc_mod_count(struct module * mod)
+{
+ if ( mod ) __MOD_INC_USE_COUNT(mod);
+ return 1;
+}
+#endif
+#endif /* __KERNEL__ */
+
+#if defined(MODULE) && !defined(__GENKSYMS__)
+
+/* Embedded module documentation macros. */
+
+/* For documentation purposes only. */
+
+#define MODULE_AUTHOR(name) \
+const char __module_author[] __attribute__((section(".modinfo"))) = \
+"author=" name
+
+#define MODULE_DESCRIPTION(desc) \
+const char __module_description[] __attribute__((section(".modinfo"))) = \
+"description=" desc
+
+/* Could potentially be used by kmod... */
+
+#define MODULE_SUPPORTED_DEVICE(dev) \
+const char __module_device[] __attribute__((section(".modinfo"))) = \
+"device=" dev
+
+/* Used to verify parameters given to the module. The TYPE arg should
+ be a string in the following format:
+ [min[-max]]{b,h,i,l,s}
+ The MIN and MAX specifiers delimit the length of the array. If MAX
+ is omitted, it defaults to MIN; if both are omitted, the default is 1.
+ The final character is a type specifier:
+ b byte
+ h short
+ i int
+ l long
+ s string
+*/
+
+#define MODULE_PARM(var,type) \
+const char __module_parm_##var[] \
+__attribute__((section(".modinfo"))) = \
+"parm_" __MODULE_STRING(var) "=" type
+
+#define MODULE_PARM_DESC(var,desc) \
+const char __module_parm_desc_##var[] \
+__attribute__((section(".modinfo"))) = \
+"parm_desc_" __MODULE_STRING(var) "=" desc
+
+/*
+ * MODULE_DEVICE_TABLE exports information about devices
+ * currently supported by this module. A device type, such as PCI,
+ * is a C-like identifier passed as the first arg to this macro.
+ * The second macro arg is the variable containing the device
+ * information being made public.
+ *
+ * The following is a list of known device types (arg 1),
+ * and the C types which are to be passed as arg 2.
+ * pci - struct pci_device_id - List of PCI ids supported by this module
+ * isapnp - struct isapnp_device_id - List of ISA PnP ids supported by this module
+ * usb - struct usb_device_id - List of USB ids supported by this module
+ */
+#define MODULE_GENERIC_TABLE(gtype,name) \
+static const unsigned long __module_##gtype##_size \
+ __attribute__ ((unused)) = sizeof(struct gtype##_id); \
+static const struct gtype##_id * __module_##gtype##_table \
+ __attribute__ ((unused)) = name
+
+/*
+ * The following license idents are currently accepted as indicating free
+ * software modules
+ *
+ * "GPL" [GNU Public License v2 or later]
+ * "GPL and additional rights" [GNU Public License v2 rights and more]
+ * "Dual BSD/GPL" [GNU Public License v2 or BSD license choice]
+ * "Dual MPL/GPL" [GNU Public License v2 or Mozilla license choice]
+ *
+ * The following other idents are available
+ *
+ * "Proprietary" [Non free products]
+ *
+ * There are dual licensed components, but when running with Linux it is the
+ * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
+ * is a GPL combined work.
+ *
+ * This exists for several reasons
+ * 1. So modinfo can show license info for users wanting to vet their setup
+ * is free
+ * 2. So the community can ignore bug reports including proprietary modules
+ * 3. So vendors can do likewise based on their own policies
+ */
+
+#define MODULE_LICENSE(license) \
+static const char __module_license[] __attribute__((section(".modinfo"))) = \
+"license=" license
+
+/* Define the module variable, and usage macros. */
+extern struct module __this_module;
+
+#define THIS_MODULE (&__this_module)
+#define MOD_INC_USE_COUNT __MOD_INC_USE_COUNT(THIS_MODULE)
+#define MOD_DEC_USE_COUNT __MOD_DEC_USE_COUNT(THIS_MODULE)
+#define MOD_IN_USE __MOD_IN_USE(THIS_MODULE)
+
+#if 0
+#include <linux/version.h>
+static const char __module_kernel_version[] __attribute__((section(".modinfo"))) =
+"kernel_version=" UTS_RELEASE;
+#ifdef MODVERSIONS
+static const char __module_using_checksums[] __attribute__((section(".modinfo"))) =
+"using_checksums=1";
+#endif
+#endif
+
+#else /* MODULE */
+
+#define MODULE_AUTHOR(name)
+#define MODULE_LICENSE(license)
+#define MODULE_DESCRIPTION(desc)
+#define MODULE_SUPPORTED_DEVICE(name)
+#define MODULE_PARM(var,type)
+#define MODULE_PARM_DESC(var,desc)
+
+/* Create a dummy reference to the table to suppress gcc unused warnings. Put
+ * the reference in the .data.exit section which is discarded when code is built
+ * in, so the reference does not bloat the running kernel. Note: cannot be
+ * const, other exit data may be writable.
+ */
+#define MODULE_GENERIC_TABLE(gtype,name) \
+static const struct gtype##_id * __module_##gtype##_table \
+ __attribute__ ((unused, __section__(".data.exit"))) = name
+
+#ifndef __GENKSYMS__
+
+#define THIS_MODULE NULL
+#define MOD_INC_USE_COUNT do { } while (0)
+#define MOD_DEC_USE_COUNT do { } while (0)
+#define MOD_IN_USE 1
+
+extern struct module *module_list;
+
+#endif /* !__GENKSYMS__ */
+
+#endif /* MODULE */
+
+#define MODULE_DEVICE_TABLE(type,name) \
+ MODULE_GENERIC_TABLE(type##_device,name)
+
+/* Export a symbol either from the kernel or a module.
+
+ In the kernel, the symbol is added to the kernel's global symbol table.
+
+ In a module, it controls which variables are exported. If no
+ variables are explicitly exported, the action is controled by the
+ insmod -[xX] flags. Otherwise, only the variables listed are exported.
+ This obviates the need for the old register_symtab() function. */
+
+#if defined(__GENKSYMS__)
+
+/* We want the EXPORT_SYMBOL tag left intact for recognition. */
+
+#elif !defined(CONFIG_MODULES)
+
+#define __EXPORT_SYMBOL(sym,str)
+#define EXPORT_SYMBOL(var)
+#define EXPORT_SYMBOL_NOVERS(var)
+#define EXPORT_SYMBOL_GPL(var)
+
+#elif !defined(EXPORT_SYMTAB)
+
+#define __EXPORT_SYMBOL(sym,str) error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL_NOVERS(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL_GPL(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile
+
+#else
+
+#define __EXPORT_SYMBOL(sym, str) \
+const char __kstrtab_##sym[] \
+__attribute__((section(".kstrtab"))) = str; \
+const struct module_symbol __ksymtab_##sym \
+__attribute__((section("__ksymtab"))) = \
+{ (unsigned long)&sym, __kstrtab_##sym }
+
+#define __EXPORT_SYMBOL_GPL(sym, str) \
+const char __kstrtab_##sym[] \
+__attribute__((section(".kstrtab"))) = "GPLONLY_" str; \
+const struct module_symbol __ksymtab_##sym \
+__attribute__((section("__ksymtab"))) = \
+{ (unsigned long)&sym, __kstrtab_##sym }
+
+#if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS)
+#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+#define EXPORT_SYMBOL_GPL(var) __EXPORT_SYMBOL_GPL(var, __MODULE_STRING(var))
+#else
+#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
+#define EXPORT_SYMBOL_GPL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
+#endif
+
+#define EXPORT_SYMBOL_NOVERS(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+
+#endif /* __GENKSYMS__ */
+
+#ifdef MODULE
+/* Force a module to export no symbols. */
+#define EXPORT_NO_SYMBOLS __asm__(".section __ksymtab\n.previous")
+#else
+#define EXPORT_NO_SYMBOLS
+#endif /* MODULE */
+
+#ifdef CONFIG_MODULES
+#define SET_MODULE_OWNER(some_struct) do { (some_struct)->owner = THIS_MODULE; } while (0)
+#else
+#define SET_MODULE_OWNER(some_struct) do { } while (0)
+#endif
+
+#endif /* _LINUX_MODULE_H */
diff --git a/xen/include/xeno/multiboot.h b/xen/include/xeno/multiboot.h
new file mode 100644
index 0000000000..a61117bbbd
--- /dev/null
+++ b/xen/include/xeno/multiboot.h
@@ -0,0 +1,81 @@
+/* multiboot.h - the header for Multiboot */
+/* Copyright (C) 1999, 2001 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __ELF__
+#error "Build on a 32-bit ELF system"
+#endif
+
+/* The magic number passed by a Multiboot-compliant boot loader. */
+#define MULTIBOOT_BOOTLOADER_MAGIC 0x2BADB002
+
+/* The symbol table for a.out. */
+typedef struct aout_symbol_table
+{
+ unsigned long tabsize;
+ unsigned long strsize;
+ unsigned long addr;
+ unsigned long reserved;
+} aout_symbol_table_t;
+
+/* The section header table for ELF. */
+typedef struct elf_section_header_table
+{
+ unsigned long num;
+ unsigned long size;
+ unsigned long addr;
+ unsigned long shndx;
+} elf_section_header_table_t;
+
+/* The Multiboot information. */
+typedef struct multiboot_info
+{
+ unsigned long flags;
+ unsigned long mem_lower;
+ unsigned long mem_upper;
+ unsigned long boot_device;
+ unsigned long cmdline;
+ unsigned long mods_count;
+ unsigned long mods_addr;
+ union
+ {
+ aout_symbol_table_t aout_sym;
+ elf_section_header_table_t elf_sec;
+ } u;
+ unsigned long mmap_length;
+ unsigned long mmap_addr;
+} multiboot_info_t;
+
+/* The module structure. */
+typedef struct module
+{
+ unsigned long mod_start;
+ unsigned long mod_end;
+ unsigned long string;
+ unsigned long reserved;
+} module_t;
+
+/* The memory map. Be careful that the offset 0 is base_addr_low
+ but no size. */
+typedef struct memory_map
+{
+ unsigned long size;
+ unsigned long base_addr_low;
+ unsigned long base_addr_high;
+ unsigned long length_low;
+ unsigned long length_high;
+ unsigned long type;
+} memory_map_t;
diff --git a/xen/include/xeno/netdevice.h b/xen/include/xeno/netdevice.h
new file mode 100644
index 0000000000..0d7c4c5606
--- /dev/null
+++ b/xen/include/xeno/netdevice.h
@@ -0,0 +1,604 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions for the Interfaces handler.
+ *
+ * Version: @(#)dev.h 1.0.10 08/12/93
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Corey Minyard <wf-rch!minyard@relay.EU.net>
+ * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
+ * Alan Cox, <Alan.Cox@linux.org>
+ * Bjorn Ekwall. <bj0rn@blox.se>
+ * Pekka Riikonen <priikone@poseidon.pspt.fi>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Moved to /usr/include/linux for NET3
+ */
+#ifndef _LINUX_NETDEVICE_H
+#define _LINUX_NETDEVICE_H
+
+#include <xeno/if.h>
+#include <xeno/if_ether.h>
+#include <xeno/if_packet.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+
+#include <asm/atomic.h>
+#include <asm/cache.h>
+#include <asm/byteorder.h>
+
+#ifdef __KERNEL__
+#include <xeno/config.h>
+
+struct vlan_group;
+
+/* Backlog congestion levels */
+#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
+#define NET_RX_DROP 1 /* packet dropped */
+
+#endif
+
+#define MAX_ADDR_LEN 8 /* Largest hardware address length */
+
+/*
+ * Network device statistics. Akin to the 2.0 ether stats but
+ * with byte counters.
+ */
+
+struct net_device_stats
+{
+ unsigned long rx_packets; /* total packets received */
+ unsigned long tx_packets; /* total packets transmitted */
+ unsigned long rx_bytes; /* total bytes received */
+ unsigned long tx_bytes; /* total bytes transmitted */
+ unsigned long rx_errors; /* bad packets received */
+ unsigned long tx_errors; /* packet transmit problems */
+ unsigned long rx_dropped; /* no space in linux buffers */
+ unsigned long tx_dropped; /* no space available in linux */
+ unsigned long multicast; /* multicast packets received */
+ unsigned long collisions;
+
+ /* detailed rx_errors: */
+ unsigned long rx_length_errors;
+ unsigned long rx_over_errors; /* receiver ring buff overflow */
+ unsigned long rx_crc_errors; /* recved pkt with crc error */
+ unsigned long rx_frame_errors; /* recv'd frame alignment error */
+ unsigned long rx_fifo_errors; /* recv'r fifo overrun */
+ unsigned long rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ unsigned long tx_aborted_errors;
+ unsigned long tx_carrier_errors;
+ unsigned long tx_fifo_errors;
+ unsigned long tx_heartbeat_errors;
+ unsigned long tx_window_errors;
+
+ /* for cslip etc */
+ unsigned long rx_compressed;
+ unsigned long tx_compressed;
+};
+
+
+/* Media selection options. */
+enum {
+ IF_PORT_UNKNOWN = 0,
+ IF_PORT_10BASE2,
+ IF_PORT_10BASET,
+ IF_PORT_AUI,
+ IF_PORT_100BASET,
+ IF_PORT_100BASETX,
+ IF_PORT_100BASEFX
+};
+
+#ifdef __KERNEL__
+
+extern const char *if_port_text[];
+
+#include <xeno/cache.h>
+#include <xeno/skbuff.h>
+
+struct neighbour;
+struct neigh_parms;
+struct sk_buff;
+
+struct netif_rx_stats
+{
+ unsigned total;
+ unsigned dropped;
+ unsigned time_squeeze;
+ unsigned throttled;
+ unsigned fastroute_hit;
+ unsigned fastroute_success;
+ unsigned fastroute_defer;
+ unsigned fastroute_deferred_out;
+ unsigned fastroute_latency_reduction;
+ unsigned cpu_collision;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct netif_rx_stats netdev_rx_stat[];
+
+
+/*
+ * We tag multicasts with these structures.
+ */
+
+struct dev_mc_list
+{
+ struct dev_mc_list *next;
+ __u8 dmi_addr[MAX_ADDR_LEN];
+ unsigned char dmi_addrlen;
+ int dmi_users;
+ int dmi_gusers;
+};
+
+struct hh_cache
+{
+ struct hh_cache *hh_next; /* Next entry */
+ atomic_t hh_refcnt; /* number of users */
+ unsigned short hh_type; /* protocol identifier, f.e ETH_P_IP
+ * NOTE: For VLANs, this will be the
+ * encapuslated type. --BLG
+ */
+ int hh_len; /* length of header */
+ int (*hh_output)(struct sk_buff *skb);
+ rwlock_t hh_lock;
+ /* cached hardware header; allow for machine alignment needs. */
+ unsigned long hh_data[16/sizeof(unsigned long)];
+};
+
+/* These flag bits are private to the generic network queueing
+ * layer, they may not be explicitly referenced by any other
+ * code.
+ */
+
+enum netdev_state_t
+{
+ __LINK_STATE_XOFF=0,
+ __LINK_STATE_START,
+ __LINK_STATE_PRESENT,
+ __LINK_STATE_NOCARRIER
+};
+
+
+/*
+ * The DEVICE structure.
+ * Actually, this whole structure is a big mistake. It mixes I/O
+ * data with strictly "high-level" data, and it has to know about
+ * almost every data structure used in the INET module.
+ *
+ * FIXME: cleanup struct net_device such that network protocol info
+ * moves out.
+ */
+
+struct net_device
+{
+ /*
+ * This is the first field of the "visible" part of this structure
+ * (i.e. as seen by users in the "Space.c" file). It is the name
+ * the interface.
+ */
+ char name[IFNAMSIZ];
+
+ /*
+ * I/O specific fields
+ * FIXME: Merge these and struct ifmap into one
+ */
+ unsigned long rmem_end; /* shmem "recv" end */
+ unsigned long rmem_start; /* shmem "recv" start */
+ unsigned long mem_end; /* shared mem end */
+ unsigned long mem_start; /* shared mem start */
+ unsigned long base_addr; /* device I/O address */
+ unsigned int irq; /* device IRQ number */
+
+ /*
+ * Some hardware also needs these fields, but they are not
+ * part of the usual set specified in Space.c.
+ */
+
+ unsigned char if_port; /* Selectable AUI, TP,..*/
+ unsigned char dma; /* DMA channel */
+
+ unsigned long state;
+
+ struct net_device *next;
+
+ /* The device initialization function. Called only once. */
+ int (*init)(struct net_device *dev);
+
+ /* ------- Fields preinitialized in Space.c finish here ------- */
+
+ struct net_device *next_sched;
+
+ /* Interface index. Unique device identifier */
+ int ifindex;
+ int iflink;
+
+
+ struct net_device_stats* (*get_stats)(struct net_device *dev);
+ struct iw_statistics* (*get_wireless_stats)(struct net_device *dev);
+
+ /*
+ * This marks the end of the "visible" part of the structure. All
+ * fields hereafter are internal to the system, and may change at
+ * will (read: may be cleaned up at will).
+ */
+
+ /* These may be needed for future network-power-down code. */
+ unsigned long trans_start; /* Time (in jiffies) of last Tx */
+ unsigned long last_rx; /* Time of last Rx */
+
+ unsigned short flags; /* interface flags (a la BSD) */
+ unsigned short gflags;
+ unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */
+ unsigned short unused_alignment_fixer; /* Because we need priv_flags,
+ * and we want to be 32-bit aligned.
+ */
+
+ unsigned mtu; /* interface MTU value */
+ unsigned short type; /* interface hardware type */
+ unsigned short hard_header_len; /* hardware hdr length */
+ void *priv; /* pointer to private data */
+
+ struct net_device *master; /* Pointer to master device of a group,
+ * which this device is member of.
+ */
+
+ /* Interface address info. */
+ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
+ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */
+ unsigned char addr_len; /* hardware address length */
+
+ struct dev_mc_list *mc_list; /* Multicast mac addresses */
+ int mc_count; /* Number of installed mcasts */
+ int promiscuity;
+ int allmulti;
+
+ int watchdog_timeo;
+ struct timer_list watchdog_timer;
+
+ /* Protocol specific pointers */
+
+ void *atalk_ptr; /* AppleTalk link */
+ void *ip_ptr; /* IPv4 specific data */
+ void *dn_ptr; /* DECnet specific data */
+ void *ip6_ptr; /* IPv6 specific data */
+ void *ec_ptr; /* Econet specific data */
+
+ /* hard_start_xmit synchronizer */
+ spinlock_t xmit_lock;
+ /* cpu id of processor entered to hard_start_xmit or -1,
+ if nobody entered there.
+ */
+ int xmit_lock_owner;
+ /* device queue lock */
+ spinlock_t queue_lock;
+ /* Number of references to this device */
+ atomic_t refcnt;
+ /* The flag marking that device is unregistered, but held by an user */
+ int deadbeaf;
+
+ /* Net device features */
+ int features;
+#define NETIF_F_SG 1 /* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
+#define NETIF_F_DYNALLOC 16 /* Self-dectructable device. */
+#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
+
+ /* Called after device is detached from network. */
+ void (*uninit)(struct net_device *dev);
+ /* Called after last user reference disappears. */
+ void (*destructor)(struct net_device *dev);
+
+ /* Pointers to interface service routines. */
+ int (*open)(struct net_device *dev);
+ int (*stop)(struct net_device *dev);
+ int (*hard_start_xmit) (struct sk_buff *skb,
+ struct net_device *dev);
+#if 0
+ int (*poll) (struct net_device *dev, int *quota); /* XXX IAP */
+#endif
+ int (*hard_header) (struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned short type,
+ void *daddr,
+ void *saddr,
+ unsigned len);
+ int (*rebuild_header)(struct sk_buff *skb);
+#define HAVE_MULTICAST
+ void (*set_multicast_list)(struct net_device *dev);
+#define HAVE_SET_MAC_ADDR
+ int (*set_mac_address)(struct net_device *dev,
+ void *addr);
+#define HAVE_PRIVATE_IOCTL
+ int (*do_ioctl)(struct net_device *dev,
+ struct ifreq *ifr, int cmd);
+#define HAVE_SET_CONFIG
+ int (*set_config)(struct net_device *dev,
+ struct ifmap *map);
+#define HAVE_HEADER_CACHE
+ int (*hard_header_cache)(struct neighbour *neigh,
+ struct hh_cache *hh);
+ void (*header_cache_update)(struct hh_cache *hh,
+ struct net_device *dev,
+ unsigned char * haddr);
+#define HAVE_CHANGE_MTU
+ int (*change_mtu)(struct net_device *dev, int new_mtu);
+
+#define HAVE_TX_TIMEOUT
+ void (*tx_timeout) (struct net_device *dev);
+
+ void (*vlan_rx_register)(struct net_device *dev,
+ struct vlan_group *grp);
+ void (*vlan_rx_add_vid)(struct net_device *dev,
+ unsigned short vid);
+ void (*vlan_rx_kill_vid)(struct net_device *dev,
+ unsigned short vid);
+
+ int (*hard_header_parse)(struct sk_buff *skb,
+ unsigned char *haddr);
+ int (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
+// int (*accept_fastpath)(struct net_device *, struct dst_entry*);
+
+ /* open/release and usage marking */
+ struct module *owner;
+
+ /* bridge stuff */
+ struct net_bridge_port *br_port;
+};
+
+
+struct packet_type
+{
+ unsigned short type; /* This is really htons(ether_type). */
+ struct net_device *dev; /* NULL is wildcarded here */
+ int (*func) (struct sk_buff *, struct net_device *,
+ struct packet_type *);
+ void *data; /* Private to the packet type */
+ struct packet_type *next;
+};
+
+
+#include <xeno/interrupt.h>
+
+extern struct net_device *dev_base; /* All devices */
+extern rwlock_t dev_base_lock; /* Device list lock */
+
+extern int netdev_boot_setup_add(char *name, struct ifmap *map);
+extern int netdev_boot_setup_check(struct net_device *dev);
+extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
+extern void dev_add_pack(struct packet_type *pt);
+extern void dev_remove_pack(struct packet_type *pt);
+extern int dev_get(const char *name);
+extern struct net_device *dev_get_by_name(const char *name);
+extern struct net_device *__dev_get_by_name(const char *name);
+extern struct net_device *dev_alloc(const char *name, int *err);
+extern int dev_alloc_name(struct net_device *dev, const char *name);
+extern int dev_open(struct net_device *dev);
+extern int dev_close(struct net_device *dev);
+extern int register_netdevice(struct net_device *dev);
+extern int unregister_netdevice(struct net_device *dev);
+extern void dev_shutdown(struct net_device *dev);
+extern void dev_activate(struct net_device *dev);
+extern void dev_deactivate(struct net_device *dev);
+extern void dev_init_scheduler(struct net_device *dev);
+extern int dev_new_index(void);
+extern struct net_device *dev_get_by_index(int ifindex);
+extern struct net_device *__dev_get_by_index(int ifindex);
+extern int dev_restart(struct net_device *dev);
+
+typedef int gifconf_func_t(struct net_device * dev, char * bufptr, int len);
+extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf);
+static inline int unregister_gifconf(unsigned int family)
+{
+ return register_gifconf(family, 0);
+}
+
+extern struct tasklet_struct net_tx_tasklet;
+
+extern struct list_head net_schedule_list;
+extern spinlock_t net_schedule_list_lock;
+
+#define HAVE_NETIF_QUEUE
+
+static inline void __netif_schedule(struct net_device *dev)
+{
+ tasklet_schedule(&net_tx_tasklet);
+}
+
+static inline void netif_schedule(struct net_device *dev)
+{
+ if (!test_bit(__LINK_STATE_XOFF, &dev->state))
+ __netif_schedule(dev);
+}
+
+static inline void netif_start_queue(struct net_device *dev)
+{
+ clear_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline void netif_wake_queue(struct net_device *dev)
+{
+ if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
+ __netif_schedule(dev);
+}
+
+static inline void netif_stop_queue(struct net_device *dev)
+{
+ set_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline int netif_queue_stopped(struct net_device *dev)
+{
+ return test_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline int netif_running(struct net_device *dev)
+{
+ return test_bit(__LINK_STATE_START, &dev->state);
+}
+
+
+/*
+ * Xen does not need deferred skb freeing, as all destructor hook functions
+ * are IRQ safe. Linux needed more care for some destructors...
+ */
+#define dev_kfree_skb_irq(_skb) dev_kfree_skb(_skb)
+#define dev_kfree_skb_any(_skb) dev_kfree_skb(_skb)
+
+extern void net_call_rx_atomic(void (*fn)(void));
+extern int netif_rx(struct sk_buff *skb);
+extern int dev_ioctl(unsigned int cmd, void *);
+extern int dev_change_flags(struct net_device *, unsigned);
+extern void dev_init(void);
+
+extern int netdev_nit;
+
+/* Post buffer to the network code from _non interrupt_ context.
+ * see net/core/dev.c for netif_rx description.
+ */
+static inline int netif_rx_ni(struct sk_buff *skb)
+{
+ int err = netif_rx(skb);
+ if (softirq_pending(smp_processor_id()))
+ do_softirq();
+ return err;
+}
+
+extern int netdev_finish_unregister(struct net_device *dev);
+
+static inline void dev_put(struct net_device *dev)
+{
+ if (atomic_dec_and_test(&dev->refcnt))
+ netdev_finish_unregister(dev);
+}
+
+#define __dev_put(dev) atomic_dec(&(dev)->refcnt)
+#define dev_hold(dev) atomic_inc(&(dev)->refcnt)
+
+/* Carrier loss detection, dial on demand. The functions netif_carrier_on
+ * and _off may be called from IRQ context, but it is caller
+ * who is responsible for serialization of these calls.
+ */
+
+static inline int netif_carrier_ok(struct net_device *dev)
+{
+ return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
+}
+
+extern void __netdev_watchdog_up(struct net_device *dev);
+
+static inline void netif_carrier_on(struct net_device *dev)
+{
+ clear_bit(__LINK_STATE_NOCARRIER, &dev->state);
+ if (netif_running(dev))
+ __netdev_watchdog_up(dev);
+}
+
+static inline void netif_carrier_off(struct net_device *dev)
+{
+ set_bit(__LINK_STATE_NOCARRIER, &dev->state);
+}
+
+/* Hot-plugging. */
+static inline int netif_device_present(struct net_device *dev)
+{
+ return test_bit(__LINK_STATE_PRESENT, &dev->state);
+}
+
+static inline void netif_device_detach(struct net_device *dev)
+{
+ if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+ netif_running(dev)) {
+ netif_stop_queue(dev);
+ }
+}
+
+static inline void netif_device_attach(struct net_device *dev)
+{
+ if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+ netif_running(dev)) {
+ netif_wake_queue(dev);
+ __netdev_watchdog_up(dev);
+ }
+}
+
+/*
+ * Network interface message level settings
+ */
+#define HAVE_NETIF_MSG 1
+
+enum {
+ NETIF_MSG_DRV = 0x0001,
+ NETIF_MSG_PROBE = 0x0002,
+ NETIF_MSG_LINK = 0x0004,
+ NETIF_MSG_TIMER = 0x0008,
+ NETIF_MSG_IFDOWN = 0x0010,
+ NETIF_MSG_IFUP = 0x0020,
+ NETIF_MSG_RX_ERR = 0x0040,
+ NETIF_MSG_TX_ERR = 0x0080,
+ NETIF_MSG_TX_QUEUED = 0x0100,
+ NETIF_MSG_INTR = 0x0200,
+ NETIF_MSG_TX_DONE = 0x0400,
+ NETIF_MSG_RX_STATUS = 0x0800,
+ NETIF_MSG_PKTDATA = 0x1000,
+};
+
+#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV)
+#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE)
+#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK)
+#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER)
+#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN)
+#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP)
+#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR)
+#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR)
+#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED)
+#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR)
+#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE)
+#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS)
+#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA)
+
+/* These functions live elsewhere (drivers/net/net_init.c, but related) */
+
+extern void ether_setup(struct net_device *dev);
+extern void fddi_setup(struct net_device *dev);
+extern void tr_setup(struct net_device *dev);
+extern void fc_setup(struct net_device *dev);
+extern void fc_freedev(struct net_device *dev);
+/* Support for loadable net-drivers */
+extern int register_netdev(struct net_device *dev);
+extern void unregister_netdev(struct net_device *dev);
+/* Functions used for multicast support */
+extern void dev_mc_upload(struct net_device *dev);
+extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
+extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
+extern void dev_mc_discard(struct net_device *dev);
+extern void dev_set_promiscuity(struct net_device *dev, int inc);
+extern void dev_set_allmulti(struct net_device *dev, int inc);
+extern void netdev_state_change(struct net_device *dev);
+/* Load a device via the kmod */
+extern void dev_load(const char *name);
+extern void dev_mcast_init(void);
+extern int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev));
+extern void netdev_unregister_fc(int bit);
+extern unsigned long netdev_fc_xoff;
+extern int netdev_set_master(struct net_device *dev, struct net_device *master);
+extern struct sk_buff * skb_checksum_help(struct sk_buff *skb);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_DEV_H */
diff --git a/xen/include/xeno/notifier.h b/xen/include/xeno/notifier.h
new file mode 100644
index 0000000000..0db9736c11
--- /dev/null
+++ b/xen/include/xeno/notifier.h
@@ -0,0 +1,64 @@
+/*
+ * Routines to manage notifier chains for passing status changes to any
+ * interested routines. We need this instead of hard coded call lists so
+ * that modules can poke their nose into the innards. The network devices
+ * needed them so here they are for the rest of you.
+ *
+ * Alan Cox <Alan.Cox@linux.org>
+ */
+
+#ifndef _LINUX_NOTIFIER_H
+#define _LINUX_NOTIFIER_H
+#include <linux/errno.h>
+
+struct notifier_block
+{
+ int (*notifier_call)(struct notifier_block *self, unsigned long, void *);
+ struct notifier_block *next;
+ int priority;
+};
+
+
+#ifdef __KERNEL__
+
+extern int notifier_chain_register(struct notifier_block **list, struct notifier_block *n);
+extern int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n);
+extern int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v);
+
+#define NOTIFY_DONE 0x0000 /* Don't care */
+#define NOTIFY_OK 0x0001 /* Suits me */
+#define NOTIFY_STOP_MASK 0x8000 /* Don't call further */
+#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */
+
+/*
+ * Declared notifiers so far. I can imagine quite a few more chains
+ * over time (eg laptop power reset chains, reboot chain (to clean
+ * device units up), device [un]mount chain, module load/unload chain,
+ * low memory chain, screenblank chain (for plug in modular screenblankers)
+ * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
+ */
+
+/* netdevice notifier chain */
+#define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */
+#define NETDEV_DOWN 0x0002
+#define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface
+ detected a hardware crash and restarted
+ - we can use this eg to kick tcp sessions
+ once done */
+#define NETDEV_CHANGE 0x0004 /* Notify device state change */
+#define NETDEV_REGISTER 0x0005
+#define NETDEV_UNREGISTER 0x0006
+#define NETDEV_CHANGEMTU 0x0007
+#define NETDEV_CHANGEADDR 0x0008
+#define NETDEV_GOING_DOWN 0x0009
+#define NETDEV_CHANGENAME 0x000A
+
+#define SYS_DOWN 0x0001 /* Notify of system down */
+#define SYS_RESTART SYS_DOWN
+#define SYS_HALT 0x0002 /* Notify of system halt */
+#define SYS_POWER_OFF 0x0003 /* Notify of system power off */
+
+#define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_NOTIFIER_H */
diff --git a/xen/include/xeno/pci.h b/xen/include/xeno/pci.h
new file mode 100644
index 0000000000..33e612491e
--- /dev/null
+++ b/xen/include/xeno/pci.h
@@ -0,0 +1,807 @@
+/*
+ * $Id: pci.h,v 1.87 1998/10/11 15:13:12 mj Exp $
+ *
+ * PCI defines and function prototypes
+ * Copyright 1994, Drew Eckhardt
+ * Copyright 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ */
+
+#ifndef LINUX_PCI_H
+#define LINUX_PCI_H
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+#define PCI_VENDOR_ID 0x00 /* 16 bits */
+#define PCI_DEVICE_ID 0x02 /* 16 bits */
+#define PCI_COMMAND 0x04 /* 16 bits */
+#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */
+#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */
+#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */
+#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */
+#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */
+#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */
+#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */
+#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */
+#define PCI_COMMAND_SERR 0x100 /* Enable SERR */
+#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */
+
+#define PCI_STATUS 0x06 /* 16 bits */
+#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
+#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */
+#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */
+#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */
+#define PCI_STATUS_PARITY 0x100 /* Detected parity error */
+#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */
+#define PCI_STATUS_DEVSEL_FAST 0x000
+#define PCI_STATUS_DEVSEL_MEDIUM 0x200
+#define PCI_STATUS_DEVSEL_SLOW 0x400
+#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */
+#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */
+#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */
+#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */
+#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */
+
+#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8
+ revision */
+#define PCI_REVISION_ID 0x08 /* Revision ID */
+#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */
+#define PCI_CLASS_DEVICE 0x0a /* Device class */
+
+#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */
+#define PCI_LATENCY_TIMER 0x0d /* 8 bits */
+#define PCI_HEADER_TYPE 0x0e /* 8 bits */
+#define PCI_HEADER_TYPE_NORMAL 0
+#define PCI_HEADER_TYPE_BRIDGE 1
+#define PCI_HEADER_TYPE_CARDBUS 2
+
+#define PCI_BIST 0x0f /* 8 bits */
+#define PCI_BIST_CODE_MASK 0x0f /* Return result */
+#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */
+#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */
+
+/*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of
+ * 0xffffffff to the register, and reading it back. Only
+ * 1 bits are decoded.
+ */
+#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */
+#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */
+#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */
+#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */
+#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */
+#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */
+#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */
+#define PCI_BASE_ADDRESS_SPACE_IO 0x01
+#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00
+#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
+#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */
+#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */
+#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */
+#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */
+#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL)
+#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL)
+/* bit 1 is reserved if address_space = 1 */
+
+/* Header type 0 (normal devices) */
+#define PCI_CARDBUS_CIS 0x28
+#define PCI_SUBSYSTEM_VENDOR_ID 0x2c
+#define PCI_SUBSYSTEM_ID 0x2e
+#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */
+#define PCI_ROM_ADDRESS_ENABLE 0x01
+#define PCI_ROM_ADDRESS_MASK (~0x7ffUL)
+
+#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */
+
+/* 0x35-0x3b are reserved */
+#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */
+#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */
+#define PCI_MIN_GNT 0x3e /* 8 bits */
+#define PCI_MAX_LAT 0x3f /* 8 bits */
+
+/* Header type 1 (PCI-to-PCI bridges) */
+#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */
+#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */
+#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */
+#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */
+#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */
+#define PCI_IO_LIMIT 0x1d
+#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */
+#define PCI_IO_RANGE_TYPE_16 0x00
+#define PCI_IO_RANGE_TYPE_32 0x01
+#define PCI_IO_RANGE_MASK (~0x0fUL)
+#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */
+#define PCI_MEMORY_BASE 0x20 /* Memory range behind */
+#define PCI_MEMORY_LIMIT 0x22
+#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define PCI_MEMORY_RANGE_MASK (~0x0fUL)
+#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */
+#define PCI_PREF_MEMORY_LIMIT 0x26
+#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define PCI_PREF_RANGE_TYPE_32 0x00
+#define PCI_PREF_RANGE_TYPE_64 0x01
+#define PCI_PREF_RANGE_MASK (~0x0fUL)
+#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */
+#define PCI_PREF_LIMIT_UPPER32 0x2c
+#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */
+#define PCI_IO_LIMIT_UPPER16 0x32
+/* 0x34 same as for htype 0 */
+/* 0x35-0x3b is reserved */
+#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_BRIDGE_CONTROL 0x3e
+#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */
+#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */
+#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */
+#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */
+#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
+#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
+#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
+
+/* Header type 2 (CardBus bridges) */
+#define PCI_CB_CAPABILITY_LIST 0x14
+/* 0x15 reserved */
+#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */
+#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */
+#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */
+#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */
+#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */
+#define PCI_CB_MEMORY_BASE_0 0x1c
+#define PCI_CB_MEMORY_LIMIT_0 0x20
+#define PCI_CB_MEMORY_BASE_1 0x24
+#define PCI_CB_MEMORY_LIMIT_1 0x28
+#define PCI_CB_IO_BASE_0 0x2c
+#define PCI_CB_IO_BASE_0_HI 0x2e
+#define PCI_CB_IO_LIMIT_0 0x30
+#define PCI_CB_IO_LIMIT_0_HI 0x32
+#define PCI_CB_IO_BASE_1 0x34
+#define PCI_CB_IO_BASE_1_HI 0x36
+#define PCI_CB_IO_LIMIT_1 0x38
+#define PCI_CB_IO_LIMIT_1_HI 0x3a
+#define PCI_CB_IO_RANGE_MASK (~0x03UL)
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_CB_BRIDGE_CONTROL 0x3e
+#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */
+#define PCI_CB_BRIDGE_CTL_SERR 0x02
+#define PCI_CB_BRIDGE_CTL_ISA 0x04
+#define PCI_CB_BRIDGE_CTL_VGA 0x08
+#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20
+#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */
+#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */
+#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */
+#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
+#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400
+#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40
+#define PCI_CB_SUBSYSTEM_ID 0x42
+#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */
+/* 0x48-0x7f reserved */
+
+/* Capability lists */
+
+#define PCI_CAP_LIST_ID 0 /* Capability ID */
+#define PCI_CAP_ID_PM 0x01 /* Power Management */
+#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */
+#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */
+#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */
+#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */
+#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */
+#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */
+#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
+#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */
+#define PCI_CAP_SIZEOF 4
+
+/* Power Management Registers */
+
+#define PCI_PM_PMC 2 /* PM Capabilities Register */
+#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */
+#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */
+#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */
+#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */
+#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */
+#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */
+#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */
+#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */
+#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */
+#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */
+#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */
+#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */
+#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */
+#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
+#define PCI_PM_CTRL 4 /* PM control and status register */
+#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
+#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
+#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
+#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
+#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */
+#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */
+#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */
+#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */
+#define PCI_PM_DATA_REGISTER 7 /* (??) */
+#define PCI_PM_SIZEOF 8
+
+/* AGP registers */
+
+#define PCI_AGP_VERSION 2 /* BCD version number */
+#define PCI_AGP_RFU 3 /* Rest of capability flags */
+#define PCI_AGP_STATUS 4 /* Status register */
+#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */
+#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */
+#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */
+#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */
+#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */
+#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */
+#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */
+#define PCI_AGP_COMMAND 8 /* Control register */
+#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */
+#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */
+#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */
+#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */
+#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */
+#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */
+#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */
+#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */
+#define PCI_AGP_SIZEOF 12
+
+/* Slot Identification */
+
+#define PCI_SID_ESR 2 /* Expansion Slot Register */
+#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */
+#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */
+#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */
+
+/* Message Signalled Interrupts registers */
+
+#define PCI_MSI_FLAGS 2 /* Various flags */
+#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */
+#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */
+#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */
+#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */
+#define PCI_MSI_RFU 3 /* Rest of capability flags */
+#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */
+#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */
+
+/* CompactPCI Hotswap Register */
+
+#define PCI_CHSWP_CSR 2 /* Control and Status Register */
+#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */
+#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */
+#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */
+#define PCI_CHSWP_LOO 0x08 /* LED On / Off */
+#define PCI_CHSWP_PI 0x30 /* Programming Interface */
+#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */
+#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */
+
+/* PCI-X registers */
+
+#define PCI_X_CMD 2 /* Modes & Features */
+#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */
+#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */
+#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */
+#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */
+#define PCI_X_DEVFN 4 /* A copy of devfn. */
+#define PCI_X_BUSNR 5 /* Bus segment number */
+#define PCI_X_STATUS 6 /* PCI-X capabilities */
+#define PCI_X_STATUS_64BIT 0x0001 /* 64-bit device */
+#define PCI_X_STATUS_133MHZ 0x0002 /* 133 MHz capable */
+#define PCI_X_STATUS_SPL_DISC 0x0004 /* Split Completion Discarded */
+#define PCI_X_STATUS_UNX_SPL 0x0008 /* Unexpected Split Completion */
+#define PCI_X_STATUS_COMPLEX 0x0010 /* Device Complexity */
+#define PCI_X_STATUS_MAX_READ 0x0060 /* Designed Maximum Memory Read Count */
+#define PCI_X_STATUS_MAX_SPLIT 0x0380 /* Design Max Outstanding Split Trans */
+#define PCI_X_STATUS_MAX_CUM 0x1c00 /* Designed Max Cumulative Read Size */
+#define PCI_X_STATUS_SPL_ERR 0x2000 /* Rcvd Split Completion Error Msg */
+
+/* Include the ID list */
+
+#include <linux/pci_ids.h>
+
+/*
+ * The PCI interface treats multi-function devices as independent
+ * devices. The slot/function address of each device is encoded
+ * in a single byte as follows:
+ *
+ * 7:3 = slot
+ * 2:0 = function
+ */
+#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
+#define PCI_FUNC(devfn) ((devfn) & 0x07)
+
+/* Ioctls for /proc/bus/pci/X/Y nodes. */
+#define PCIIOC_BASE ('P' << 24 | 'C' << 16 | 'I' << 8)
+#define PCIIOC_CONTROLLER (PCIIOC_BASE | 0x00) /* Get controller for PCI device. */
+#define PCIIOC_MMAP_IS_IO (PCIIOC_BASE | 0x01) /* Set mmap state to I/O space. */
+#define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */
+#define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/ioport.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+
+/* File state for mmap()s on /proc/bus/pci/X/Y */
+enum pci_mmap_state {
+ pci_mmap_io,
+ pci_mmap_mem
+};
+
+/* This defines the direction arg to the DMA mapping routines. */
+#define PCI_DMA_BIDIRECTIONAL 0
+#define PCI_DMA_TODEVICE 1
+#define PCI_DMA_FROMDEVICE 2
+#define PCI_DMA_NONE 3
+
+#define DEVICE_COUNT_COMPATIBLE 4
+#define DEVICE_COUNT_IRQ 2
+#define DEVICE_COUNT_DMA 2
+#define DEVICE_COUNT_RESOURCE 12
+
+#define PCI_ANY_ID (~0)
+
+#define pci_present pcibios_present
+
+
+#define pci_for_each_dev_reverse(dev) \
+ for(dev = pci_dev_g(pci_devices.prev); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.prev))
+
+#define pci_for_each_bus(bus) \
+for(bus = pci_bus_b(pci_root_buses.next); bus != pci_bus_b(&pci_root_buses); bus = pci_bus_b(bus->node.next))
+
+/*
+ * The pci_dev structure is used to describe both PCI and ISAPnP devices.
+ */
+struct pci_dev {
+ struct list_head global_list; /* node in list of all PCI devices */
+ struct list_head bus_list; /* node in per-bus list */
+ struct pci_bus *bus; /* bus this device is on */
+ struct pci_bus *subordinate; /* bus this device bridges to */
+
+ void *sysdata; /* hook for sys-specific extension */
+ struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
+
+ unsigned int devfn; /* encoded device & function index */
+ unsigned short vendor;
+ unsigned short device;
+ unsigned short subsystem_vendor;
+ unsigned short subsystem_device;
+ unsigned int class; /* 3 bytes: (base,sub,prog-if) */
+ u8 hdr_type; /* PCI header type (`multi' flag masked out) */
+ u8 rom_base_reg; /* which config register controls the ROM */
+
+ struct pci_driver *driver; /* which driver has allocated this device */
+ void *driver_data; /* data private to the driver */
+ u64 dma_mask; /* Mask of the bits of bus address this
+ device implements. Normally this is
+ 0xffffffff. You only need to change
+ this if your device has broken DMA
+ or supports 64-bit transfers. */
+
+ u32 current_state; /* Current operating state. In ACPI-speak,
+ this is D0-D3, D0 being fully functional,
+ and D3 being off. */
+
+ /* device is compatible with these IDs */
+ unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
+ unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
+
+ /*
+ * Instead of touching interrupt line and base address registers
+ * directly, use the values stored here. They might be different!
+ */
+ unsigned int irq;
+ struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+ struct resource dma_resource[DEVICE_COUNT_DMA];
+ struct resource irq_resource[DEVICE_COUNT_IRQ];
+
+ char name[90]; /* device name */
+ char slot_name[8]; /* slot name */
+ int active; /* ISAPnP: device is active */
+ int ro; /* ISAPnP: read only */
+ unsigned short regs; /* ISAPnP: supported registers */
+
+ /* These fields are used by common fixups */
+ unsigned short transparent:1; /* Transparent PCI bridge */
+
+ int (*prepare)(struct pci_dev *dev); /* ISAPnP hooks */
+ int (*activate)(struct pci_dev *dev);
+ int (*deactivate)(struct pci_dev *dev);
+};
+
+#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
+#define pci_dev_b(n) list_entry(n, struct pci_dev, bus_list)
+
+/*
+ * For PCI devices, the region numbers are assigned this way:
+ *
+ * 0-5 standard PCI regions
+ * 6 expansion ROM
+ * 7-10 bridges: address space assigned to buses behind the bridge
+ */
+
+#define PCI_ROM_RESOURCE 6
+#define PCI_BRIDGE_RESOURCES 7
+#define PCI_NUM_RESOURCES 11
+
+#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */
+
+struct pci_bus {
+ struct list_head node; /* node in list of buses */
+ struct pci_bus *parent; /* parent bus this bridge is on */
+ struct list_head children; /* list of child buses */
+ struct list_head devices; /* list of devices on this bus */
+ struct pci_dev *self; /* bridge device as seen by parent */
+ struct resource *resource[4]; /* address space routed to this bus */
+
+ struct pci_ops *ops; /* configuration access functions */
+ void *sysdata; /* hook for sys-specific extension */
+ struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */
+
+ unsigned char number; /* bus number */
+ unsigned char primary; /* number of primary bridge */
+ unsigned char secondary; /* number of secondary bridge */
+ unsigned char subordinate; /* max number of subordinate buses */
+
+ char name[48];
+ unsigned short vendor;
+ unsigned short device;
+ unsigned int serial; /* serial number */
+ unsigned char pnpver; /* Plug & Play version */
+ unsigned char productver; /* product version */
+ unsigned char checksum; /* if zero - checksum passed */
+ unsigned char pad1;
+};
+
+#define pci_bus_b(n) list_entry(n, struct pci_bus, node)
+
+extern struct list_head pci_root_buses; /* list of all known PCI buses */
+extern struct list_head pci_devices; /* list of all devices */
+
+extern struct proc_dir_entry *proc_bus_pci_dir;
+/*
+ * Error values that may be returned by PCI functions.
+ */
+#define PCIBIOS_SUCCESSFUL 0x00
+#define PCIBIOS_FUNC_NOT_SUPPORTED 0x81
+#define PCIBIOS_BAD_VENDOR_ID 0x83
+#define PCIBIOS_DEVICE_NOT_FOUND 0x86
+#define PCIBIOS_BAD_REGISTER_NUMBER 0x87
+#define PCIBIOS_SET_FAILED 0x88
+#define PCIBIOS_BUFFER_TOO_SMALL 0x89
+
+/* Low-level architecture-dependent routines */
+
+struct pci_ops {
+ int (*read_byte)(struct pci_dev *, int where, u8 *val);
+ int (*read_word)(struct pci_dev *, int where, u16 *val);
+ int (*read_dword)(struct pci_dev *, int where, u32 *val);
+ int (*write_byte)(struct pci_dev *, int where, u8 val);
+ int (*write_word)(struct pci_dev *, int where, u16 val);
+ int (*write_dword)(struct pci_dev *, int where, u32 val);
+};
+
+struct pbus_set_ranges_data
+{
+ unsigned long io_start, io_end;
+ unsigned long mem_start, mem_end;
+ unsigned long prefetch_start, prefetch_end;
+};
+
+struct pci_device_id {
+ unsigned int vendor, device; /* Vendor and device ID or PCI_ANY_ID */
+ unsigned int subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
+ unsigned int class, class_mask; /* (class,subclass,prog-if) triplet */
+ unsigned long driver_data; /* Data private to the driver */
+};
+
+struct pci_driver {
+ struct list_head node;
+ char *name;
+ const struct pci_device_id *id_table; /* NULL if wants all devices */
+ int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */
+ void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */
+ int (*save_state) (struct pci_dev *dev, u32 state); /* Save Device Context */
+ int (*suspend) (struct pci_dev *dev, u32 state); /* Device suspended */
+ int (*resume) (struct pci_dev *dev); /* Device woken up */
+ int (*enable_wake) (struct pci_dev *dev, u32 state, int enable); /* Enable wake event */
+};
+
+
+/* these external functions are only available when PCI support is enabled */
+#ifdef CONFIG_PCI
+
+#define pci_for_each_dev(dev) \
+ for(dev = pci_dev_g(pci_devices.next); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.next))
+
+void pcibios_init(void);
+void pcibios_fixup_bus(struct pci_bus *);
+int pcibios_enable_device(struct pci_dev *, int mask);
+char *pcibios_setup (char *str);
+
+/* Used only when drivers/pci/setup.c is used */
+void pcibios_align_resource(void *, struct resource *,
+ unsigned long, unsigned long);
+void pcibios_update_resource(struct pci_dev *, struct resource *,
+ struct resource *, int);
+void pcibios_update_irq(struct pci_dev *, int irq);
+void pcibios_fixup_pbus_ranges(struct pci_bus *, struct pbus_set_ranges_data *);
+
+/* Backward compatibility, don't use in new code! */
+
+int pcibios_present(void);
+int pcibios_read_config_byte (unsigned char bus, unsigned char dev_fn,
+ unsigned char where, unsigned char *val);
+int pcibios_read_config_word (unsigned char bus, unsigned char dev_fn,
+ unsigned char where, unsigned short *val);
+int pcibios_read_config_dword (unsigned char bus, unsigned char dev_fn,
+ unsigned char where, unsigned int *val);
+int pcibios_write_config_byte (unsigned char bus, unsigned char dev_fn,
+ unsigned char where, unsigned char val);
+int pcibios_write_config_word (unsigned char bus, unsigned char dev_fn,
+ unsigned char where, unsigned short val);
+int pcibios_write_config_dword (unsigned char bus, unsigned char dev_fn,
+ unsigned char where, unsigned int val);
+int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn);
+int pcibios_find_device (unsigned short vendor, unsigned short dev_id,
+ unsigned short index, unsigned char *bus,
+ unsigned char *dev_fn);
+
+/* Generic PCI functions used internally */
+
+void pci_init(void);
+int pci_bus_exists(const struct list_head *list, int nr);
+struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
+struct pci_bus *pci_alloc_primary_bus(int bus);
+struct pci_dev *pci_scan_slot(struct pci_dev *temp);
+int pci_proc_attach_device(struct pci_dev *dev);
+int pci_proc_detach_device(struct pci_dev *dev);
+int pci_proc_attach_bus(struct pci_bus *bus);
+int pci_proc_detach_bus(struct pci_bus *bus);
+void pci_name_device(struct pci_dev *dev);
+char *pci_class_name(u32 class);
+void pci_read_bridge_bases(struct pci_bus *child);
+struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res);
+int pci_setup_device(struct pci_dev *dev);
+int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
+
+/* Generic PCI functions exported to card drivers */
+
+struct pci_dev *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from);
+struct pci_dev *pci_find_subsys (unsigned int vendor, unsigned int device,
+ unsigned int ss_vendor, unsigned int ss_device,
+ const struct pci_dev *from);
+struct pci_dev *pci_find_class (unsigned int class, const struct pci_dev *from);
+struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn);
+int pci_find_capability (struct pci_dev *dev, int cap);
+
+int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val);
+int pci_read_config_word(struct pci_dev *dev, int where, u16 *val);
+int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val);
+int pci_write_config_byte(struct pci_dev *dev, int where, u8 val);
+int pci_write_config_word(struct pci_dev *dev, int where, u16 val);
+int pci_write_config_dword(struct pci_dev *dev, int where, u32 val);
+
+int pci_enable_device(struct pci_dev *dev);
+int pci_enable_device_bars(struct pci_dev *dev, int mask);
+void pci_disable_device(struct pci_dev *dev);
+void pci_set_master(struct pci_dev *dev);
+#define HAVE_PCI_SET_MWI
+int pci_set_mwi(struct pci_dev *dev);
+void pci_clear_mwi(struct pci_dev *dev);
+int pdev_set_mwi(struct pci_dev *dev);
+int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
+int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask);
+int pci_assign_resource(struct pci_dev *dev, int i);
+
+/* Power management related routines */
+int pci_save_state(struct pci_dev *dev, u32 *buffer);
+int pci_restore_state(struct pci_dev *dev, u32 *buffer);
+int pci_set_power_state(struct pci_dev *dev, int state);
+int pci_enable_wake(struct pci_dev *dev, u32 state, int enable);
+
+/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+
+int pci_claim_resource(struct pci_dev *, int);
+void pci_assign_unassigned_resources(void);
+void pdev_enable_device(struct pci_dev *);
+void pdev_sort_resources(struct pci_dev *, struct resource_list *);
+unsigned long pci_bridge_check_io(struct pci_dev *);
+void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
+ int (*)(struct pci_dev *, u8, u8));
+#define HAVE_PCI_REQ_REGIONS 2
+int pci_request_regions(struct pci_dev *, char *);
+void pci_release_regions(struct pci_dev *);
+int pci_request_region(struct pci_dev *, int, char *);
+void pci_release_region(struct pci_dev *, int);
+
+/* New-style probing supporting hot-pluggable devices */
+int pci_register_driver(struct pci_driver *);
+void pci_unregister_driver(struct pci_driver *);
+void pci_insert_device(struct pci_dev *, struct pci_bus *);
+void pci_remove_device(struct pci_dev *);
+struct pci_driver *pci_dev_driver(const struct pci_dev *);
+const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev);
+void pci_announce_device_to_drivers(struct pci_dev *);
+unsigned int pci_do_scan_bus(struct pci_bus *bus);
+struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr);
+
+#if 0
+/* kmem_cache style wrapper around pci_alloc_consistent() */
+struct pci_pool *pci_pool_create (const char *name, struct pci_dev *dev,
+ size_t size, size_t align, size_t allocation, int flags);
+void pci_pool_destroy (struct pci_pool *pool);
+
+void *pci_pool_alloc (struct pci_pool *pool, int flags, dma_addr_t *handle);
+void pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t addr);
+#endif
+
+#endif /* CONFIG_PCI */
+
+/* Include architecture-dependent settings and functions */
+
+#include <asm/pci.h>
+
+/*
+ * If the system does not have PCI, clearly these return errors. Define
+ * these as simple inline functions to avoid hair in drivers.
+ */
+
+#ifndef CONFIG_PCI
+static inline int pcibios_present(void) { return 0; }
+static inline int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn)
+{ return PCIBIOS_DEVICE_NOT_FOUND; }
+
+#define _PCI_NOP(o,s,t) \
+ static inline int pcibios_##o##_config_##s (u8 bus, u8 dfn, u8 where, t val) \
+ { return PCIBIOS_FUNC_NOT_SUPPORTED; } \
+ static inline int pci_##o##_config_##s (struct pci_dev *dev, int where, t val) \
+ { return PCIBIOS_FUNC_NOT_SUPPORTED; }
+#define _PCI_NOP_ALL(o,x) _PCI_NOP(o,byte,u8 x) \
+ _PCI_NOP(o,word,u16 x) \
+ _PCI_NOP(o,dword,u32 x)
+_PCI_NOP_ALL(read, *)
+_PCI_NOP_ALL(write,)
+
+static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_class(unsigned int class, const struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_subsys(unsigned int vendor, unsigned int device,
+unsigned int ss_vendor, unsigned int ss_device, const struct pci_dev *from)
+{ return NULL; }
+
+static inline void pci_set_master(struct pci_dev *dev) { }
+static inline int pci_enable_device_bars(struct pci_dev *dev, int mask) { return -EBUSY; }
+static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
+static inline void pci_disable_device(struct pci_dev *dev) { }
+static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; }
+static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
+static inline int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
+static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;}
+static inline int pci_register_driver(struct pci_driver *drv) { return 0;}
+static inline void pci_unregister_driver(struct pci_driver *drv) { }
+static inline int scsi_to_pci_dma_dir(unsigned char scsi_dir) { return scsi_dir; }
+static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; }
+static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; }
+
+/* Power management related routines */
+static inline int pci_save_state(struct pci_dev *dev, u32 *buffer) { return 0; }
+static inline int pci_restore_state(struct pci_dev *dev, u32 *buffer) { return 0; }
+static inline int pci_set_power_state(struct pci_dev *dev, int state) { return 0; }
+static inline int pci_enable_wake(struct pci_dev *dev, u32 state, int enable) { return 0; }
+
+#define pci_for_each_dev(dev) \
+ for(dev = NULL; 0; )
+
+#else
+
+/*
+ * a helper function which helps ensure correct pci_driver
+ * setup and cleanup for commonly-encountered hotplug/modular cases
+ *
+ * This MUST stay in a header, as it checks for -DMODULE
+ */
+static inline int pci_module_init(struct pci_driver *drv)
+{
+ int rc = pci_register_driver (drv);
+
+ if (rc > 0)
+ return 0;
+
+ /* iff CONFIG_HOTPLUG and built into kernel, we should
+ * leave the driver around for future hotplug events.
+ * For the module case, a hotplug daemon of some sort
+ * should load a module in response to an insert event. */
+#if defined(CONFIG_HOTPLUG) && !defined(MODULE)
+ if (rc == 0)
+ return 0;
+#else
+ if (rc == 0)
+ rc = -ENODEV;
+#endif
+
+ /* if we get here, we need to clean up pci driver instance
+ * and return some sort of error */
+ pci_unregister_driver (drv);
+
+ return rc;
+}
+
+#endif /* !CONFIG_PCI */
+
+/* these helpers provide future and backwards compatibility
+ * for accessing popular PCI BAR info */
+#define pci_resource_start(dev,bar) ((dev)->resource[(bar)].start)
+#define pci_resource_end(dev,bar) ((dev)->resource[(bar)].end)
+#define pci_resource_flags(dev,bar) ((dev)->resource[(bar)].flags)
+#define pci_resource_len(dev,bar) \
+ ((pci_resource_start((dev),(bar)) == 0 && \
+ pci_resource_end((dev),(bar)) == \
+ pci_resource_start((dev),(bar))) ? 0 : \
+ \
+ (pci_resource_end((dev),(bar)) - \
+ pci_resource_start((dev),(bar)) + 1))
+
+/* Similar to the helpers above, these manipulate per-pci_dev
+ * driver-specific data. Currently stored as pci_dev::driver_data,
+ * a void pointer, but it is not present on older kernels.
+ */
+static inline void *pci_get_drvdata (struct pci_dev *pdev)
+{
+ return pdev->driver_data;
+}
+
+static inline void pci_set_drvdata (struct pci_dev *pdev, void *data)
+{
+ pdev->driver_data = data;
+}
+
+/*
+ * The world is not perfect and supplies us with broken PCI devices.
+ * For at least a part of these bugs we need a work-around, so both
+ * generic (drivers/pci/quirks.c) and per-architecture code can define
+ * fixup hooks to be called for particular buggy devices.
+ */
+
+struct pci_fixup {
+ int pass;
+ u16 vendor, device; /* You can use PCI_ANY_ID here of course */
+ void (*hook)(struct pci_dev *dev);
+};
+
+extern struct pci_fixup pcibios_fixups[];
+
+#define PCI_FIXUP_HEADER 1 /* Called immediately after reading configuration header */
+#define PCI_FIXUP_FINAL 2 /* Final phase of device fixups */
+
+void pci_fixup_device(int pass, struct pci_dev *dev);
+
+extern int pci_pci_problems;
+#define PCIPCI_FAIL 1
+#define PCIPCI_TRITON 2
+#define PCIPCI_NATOMA 4
+#define PCIPCI_VIAETBF 8
+#define PCIPCI_VSFX 16
+#define PCIPCI_ALIMAGIK 32
+
+#endif /* __KERNEL__ */
+#endif /* LINUX_PCI_H */
diff --git a/xen/include/xeno/pci_ids.h b/xen/include/xeno/pci_ids.h
new file mode 100644
index 0000000000..3220beb90c
--- /dev/null
+++ b/xen/include/xeno/pci_ids.h
@@ -0,0 +1,1856 @@
+/*
+ * PCI Class, Vendor and Device IDs
+ *
+ * Please keep sorted.
+ */
+
+/* Device classes and subclasses */
+
+#define PCI_CLASS_NOT_DEFINED 0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA 0x0001
+
+#define PCI_BASE_CLASS_STORAGE 0x01
+#define PCI_CLASS_STORAGE_SCSI 0x0100
+#define PCI_CLASS_STORAGE_IDE 0x0101
+#define PCI_CLASS_STORAGE_FLOPPY 0x0102
+#define PCI_CLASS_STORAGE_IPI 0x0103
+#define PCI_CLASS_STORAGE_RAID 0x0104
+#define PCI_CLASS_STORAGE_OTHER 0x0180
+
+#define PCI_BASE_CLASS_NETWORK 0x02
+#define PCI_CLASS_NETWORK_ETHERNET 0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201
+#define PCI_CLASS_NETWORK_FDDI 0x0202
+#define PCI_CLASS_NETWORK_ATM 0x0203
+#define PCI_CLASS_NETWORK_OTHER 0x0280
+
+#define PCI_BASE_CLASS_DISPLAY 0x03
+#define PCI_CLASS_DISPLAY_VGA 0x0300
+#define PCI_CLASS_DISPLAY_XGA 0x0301
+#define PCI_CLASS_DISPLAY_3D 0x0302
+#define PCI_CLASS_DISPLAY_OTHER 0x0380
+
+#define PCI_BASE_CLASS_MULTIMEDIA 0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400
+#define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480
+
+#define PCI_BASE_CLASS_MEMORY 0x05
+#define PCI_CLASS_MEMORY_RAM 0x0500
+#define PCI_CLASS_MEMORY_FLASH 0x0501
+#define PCI_CLASS_MEMORY_OTHER 0x0580
+
+#define PCI_BASE_CLASS_BRIDGE 0x06
+#define PCI_CLASS_BRIDGE_HOST 0x0600
+#define PCI_CLASS_BRIDGE_ISA 0x0601
+#define PCI_CLASS_BRIDGE_EISA 0x0602
+#define PCI_CLASS_BRIDGE_MC 0x0603
+#define PCI_CLASS_BRIDGE_PCI 0x0604
+#define PCI_CLASS_BRIDGE_PCMCIA 0x0605
+#define PCI_CLASS_BRIDGE_NUBUS 0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS 0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY 0x0608
+#define PCI_CLASS_BRIDGE_OTHER 0x0680
+
+#define PCI_BASE_CLASS_COMMUNICATION 0x07
+#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM 0x0703
+#define PCI_CLASS_COMMUNICATION_OTHER 0x0780
+
+#define PCI_BASE_CLASS_SYSTEM 0x08
+#define PCI_CLASS_SYSTEM_PIC 0x0800
+#define PCI_CLASS_SYSTEM_DMA 0x0801
+#define PCI_CLASS_SYSTEM_TIMER 0x0802
+#define PCI_CLASS_SYSTEM_RTC 0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804
+#define PCI_CLASS_SYSTEM_OTHER 0x0880
+
+#define PCI_BASE_CLASS_INPUT 0x09
+#define PCI_CLASS_INPUT_KEYBOARD 0x0900
+#define PCI_CLASS_INPUT_PEN 0x0901
+#define PCI_CLASS_INPUT_MOUSE 0x0902
+#define PCI_CLASS_INPUT_SCANNER 0x0903
+#define PCI_CLASS_INPUT_GAMEPORT 0x0904
+#define PCI_CLASS_INPUT_OTHER 0x0980
+
+#define PCI_BASE_CLASS_DOCKING 0x0a
+#define PCI_CLASS_DOCKING_GENERIC 0x0a00
+#define PCI_CLASS_DOCKING_OTHER 0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR 0x0b
+#define PCI_CLASS_PROCESSOR_386 0x0b00
+#define PCI_CLASS_PROCESSOR_486 0x0b01
+#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02
+#define PCI_CLASS_PROCESSOR_ALPHA 0x0b10
+#define PCI_CLASS_PROCESSOR_POWERPC 0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS 0x0b30
+#define PCI_CLASS_PROCESSOR_CO 0x0b40
+
+#define PCI_BASE_CLASS_SERIAL 0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00
+#define PCI_CLASS_SERIAL_ACCESS 0x0c01
+#define PCI_CLASS_SERIAL_SSA 0x0c02
+#define PCI_CLASS_SERIAL_USB 0x0c03
+#define PCI_CLASS_SERIAL_FIBER 0x0c04
+#define PCI_CLASS_SERIAL_SMBUS 0x0c05
+
+#define PCI_BASE_CLASS_INTELLIGENT 0x0e
+#define PCI_CLASS_INTELLIGENT_I2O 0x0e00
+
+#define PCI_BASE_CLASS_SATELLITE 0x0f
+#define PCI_CLASS_SATELLITE_TV 0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO 0x0f01
+#define PCI_CLASS_SATELLITE_VOICE 0x0f03
+#define PCI_CLASS_SATELLITE_DATA 0x0f04
+
+#define PCI_BASE_CLASS_CRYPT 0x10
+#define PCI_CLASS_CRYPT_NETWORK 0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1001
+#define PCI_CLASS_CRYPT_OTHER 0x1080
+
+#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
+#define PCI_CLASS_SP_DPIO 0x1100
+#define PCI_CLASS_SP_OTHER 0x1180
+
+#define PCI_CLASS_OTHERS 0xff
+
+/* Vendors and devices. Sort key: vendor first, device next. */
+
+#define PCI_VENDOR_ID_DYNALINK 0x0675
+#define PCI_DEVICE_ID_DYNALINK_IS64PH 0x1702
+
+#define PCI_VENDOR_ID_BERKOM 0x0871
+#define PCI_DEVICE_ID_BERKOM_A1T 0xffa1
+#define PCI_DEVICE_ID_BERKOM_T_CONCEPT 0xffa2
+#define PCI_DEVICE_ID_BERKOM_A4T 0xffa4
+#define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8
+
+#define PCI_VENDOR_ID_COMPAQ 0x0e11
+#define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508
+#define PCI_DEVICE_ID_COMPAQ_1280 0x3033
+#define PCI_DEVICE_ID_COMPAQ_TRIFLEX 0x4000
+#define PCI_DEVICE_ID_COMPAQ_6010 0x6010
+#define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc
+#define PCI_DEVICE_ID_COMPAQ_SMART2P 0xae10
+#define PCI_DEVICE_ID_COMPAQ_NETEL100 0xae32
+#define PCI_DEVICE_ID_COMPAQ_TRIFLEX_IDE 0xae33
+#define PCI_DEVICE_ID_COMPAQ_NETEL10 0xae34
+#define PCI_DEVICE_ID_COMPAQ_NETFLEX3I 0xae35
+#define PCI_DEVICE_ID_COMPAQ_NETEL100D 0xae40
+#define PCI_DEVICE_ID_COMPAQ_NETEL100PI 0xae43
+#define PCI_DEVICE_ID_COMPAQ_NETEL100I 0xb011
+#define PCI_DEVICE_ID_COMPAQ_CISS 0xb060
+#define PCI_DEVICE_ID_COMPAQ_CISSB 0xb178
+#define PCI_DEVICE_ID_COMPAQ_CISSC 0x0046
+#define PCI_DEVICE_ID_COMPAQ_THUNDER 0xf130
+#define PCI_DEVICE_ID_COMPAQ_NETFLEX3B 0xf150
+
+#define PCI_VENDOR_ID_NCR 0x1000
+#define PCI_VENDOR_ID_LSI_LOGIC 0x1000
+#define PCI_DEVICE_ID_NCR_53C810 0x0001
+#define PCI_DEVICE_ID_NCR_53C820 0x0002
+#define PCI_DEVICE_ID_NCR_53C825 0x0003
+#define PCI_DEVICE_ID_NCR_53C815 0x0004
+#define PCI_DEVICE_ID_LSI_53C810AP 0x0005
+#define PCI_DEVICE_ID_NCR_53C860 0x0006
+#define PCI_DEVICE_ID_LSI_53C1510 0x000a
+#define PCI_DEVICE_ID_NCR_53C896 0x000b
+#define PCI_DEVICE_ID_NCR_53C895 0x000c
+#define PCI_DEVICE_ID_NCR_53C885 0x000d
+#define PCI_DEVICE_ID_NCR_53C875 0x000f
+#define PCI_DEVICE_ID_NCR_53C1510 0x0010
+#define PCI_DEVICE_ID_LSI_53C895A 0x0012
+#define PCI_DEVICE_ID_LSI_53C875A 0x0013
+#define PCI_DEVICE_ID_LSI_53C1010_33 0x0020
+#define PCI_DEVICE_ID_LSI_53C1010_66 0x0021
+#define PCI_DEVICE_ID_LSI_53C1030 0x0030
+#define PCI_DEVICE_ID_LSI_53C1035 0x0040
+#define PCI_DEVICE_ID_NCR_53C875J 0x008f
+#define PCI_DEVICE_ID_LSI_FC909 0x0621
+#define PCI_DEVICE_ID_LSI_FC929 0x0622
+#define PCI_DEVICE_ID_LSI_FC929_LAN 0x0623
+#define PCI_DEVICE_ID_LSI_FC919 0x0624
+#define PCI_DEVICE_ID_LSI_FC919_LAN 0x0625
+#define PCI_DEVICE_ID_LSI_FC929X 0x0626
+#define PCI_DEVICE_ID_LSI_FC919X 0x0628
+#define PCI_DEVICE_ID_NCR_YELLOWFIN 0x0701
+#define PCI_DEVICE_ID_LSI_61C102 0x0901
+#define PCI_DEVICE_ID_LSI_63C815 0x1000
+
+#define PCI_VENDOR_ID_ATI 0x1002
+/* Mach64 */
+#define PCI_DEVICE_ID_ATI_68800 0x4158
+#define PCI_DEVICE_ID_ATI_215CT222 0x4354
+#define PCI_DEVICE_ID_ATI_210888CX 0x4358
+#define PCI_DEVICE_ID_ATI_215ET222 0x4554
+/* Mach64 / Rage */
+#define PCI_DEVICE_ID_ATI_215GB 0x4742
+#define PCI_DEVICE_ID_ATI_215GD 0x4744
+#define PCI_DEVICE_ID_ATI_215GI 0x4749
+#define PCI_DEVICE_ID_ATI_215GP 0x4750
+#define PCI_DEVICE_ID_ATI_215GQ 0x4751
+#define PCI_DEVICE_ID_ATI_215XL 0x4752
+#define PCI_DEVICE_ID_ATI_215GT 0x4754
+#define PCI_DEVICE_ID_ATI_215GTB 0x4755
+#define PCI_DEVICE_ID_ATI_215_IV 0x4756
+#define PCI_DEVICE_ID_ATI_215_IW 0x4757
+#define PCI_DEVICE_ID_ATI_215_IZ 0x475A
+#define PCI_DEVICE_ID_ATI_210888GX 0x4758
+#define PCI_DEVICE_ID_ATI_215_LB 0x4c42
+#define PCI_DEVICE_ID_ATI_215_LD 0x4c44
+#define PCI_DEVICE_ID_ATI_215_LG 0x4c47
+#define PCI_DEVICE_ID_ATI_215_LI 0x4c49
+#define PCI_DEVICE_ID_ATI_215_LM 0x4c4D
+#define PCI_DEVICE_ID_ATI_215_LN 0x4c4E
+#define PCI_DEVICE_ID_ATI_215_LR 0x4c52
+#define PCI_DEVICE_ID_ATI_215_LS 0x4c53
+#define PCI_DEVICE_ID_ATI_264_LT 0x4c54
+/* Mach64 VT */
+#define PCI_DEVICE_ID_ATI_264VT 0x5654
+#define PCI_DEVICE_ID_ATI_264VU 0x5655
+#define PCI_DEVICE_ID_ATI_264VV 0x5656
+/* Rage128 Pro GL */
+#define PCI_DEVICE_ID_ATI_Rage128_PA 0x5041
+#define PCI_DEVICE_ID_ATI_Rage128_PB 0x5042
+#define PCI_DEVICE_ID_ATI_Rage128_PC 0x5043
+#define PCI_DEVICE_ID_ATI_Rage128_PD 0x5044
+#define PCI_DEVICE_ID_ATI_Rage128_PE 0x5045
+#define PCI_DEVICE_ID_ATI_RAGE128_PF 0x5046
+/* Rage128 Pro VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_PG 0x5047
+#define PCI_DEVICE_ID_ATI_RAGE128_PH 0x5048
+#define PCI_DEVICE_ID_ATI_RAGE128_PI 0x5049
+#define PCI_DEVICE_ID_ATI_RAGE128_PJ 0x504A
+#define PCI_DEVICE_ID_ATI_RAGE128_PK 0x504B
+#define PCI_DEVICE_ID_ATI_RAGE128_PL 0x504C
+#define PCI_DEVICE_ID_ATI_RAGE128_PM 0x504D
+#define PCI_DEVICE_ID_ATI_RAGE128_PN 0x504E
+#define PCI_DEVICE_ID_ATI_RAGE128_PO 0x504F
+#define PCI_DEVICE_ID_ATI_RAGE128_PP 0x5050
+#define PCI_DEVICE_ID_ATI_RAGE128_PQ 0x5051
+#define PCI_DEVICE_ID_ATI_RAGE128_PR 0x5052
+#define PCI_DEVICE_ID_ATI_RAGE128_TR 0x5452
+#define PCI_DEVICE_ID_ATI_RAGE128_PS 0x5053
+#define PCI_DEVICE_ID_ATI_RAGE128_PT 0x5054
+#define PCI_DEVICE_ID_ATI_RAGE128_PU 0x5055
+#define PCI_DEVICE_ID_ATI_RAGE128_PV 0x5056
+#define PCI_DEVICE_ID_ATI_RAGE128_PW 0x5057
+#define PCI_DEVICE_ID_ATI_RAGE128_PX 0x5058
+/* Rage128 GL */
+#define PCI_DEVICE_ID_ATI_RAGE128_RE 0x5245
+#define PCI_DEVICE_ID_ATI_RAGE128_RF 0x5246
+#define PCI_DEVICE_ID_ATI_RAGE128_RG 0x534b
+#define PCI_DEVICE_ID_ATI_RAGE128_RH 0x534c
+#define PCI_DEVICE_ID_ATI_RAGE128_RI 0x534d
+/* Rage128 VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_RK 0x524b
+#define PCI_DEVICE_ID_ATI_RAGE128_RL 0x524c
+#define PCI_DEVICE_ID_ATI_RAGE128_RM 0x5345
+#define PCI_DEVICE_ID_ATI_RAGE128_RN 0x5346
+#define PCI_DEVICE_ID_ATI_RAGE128_RO 0x5347
+/* Rage128 M3 */
+#define PCI_DEVICE_ID_ATI_RAGE128_LE 0x4c45
+#define PCI_DEVICE_ID_ATI_RAGE128_LF 0x4c46
+/* Rage128 Pro Ultra */
+#define PCI_DEVICE_ID_ATI_RAGE128_U1 0x5446
+#define PCI_DEVICE_ID_ATI_RAGE128_U2 0x544C
+#define PCI_DEVICE_ID_ATI_RAGE128_U3 0x5452
+/* Radeon M4 */
+#define PCI_DEVICE_ID_ATI_RADEON_LE 0x4d45
+#define PCI_DEVICE_ID_ATI_RADEON_LF 0x4d46
+/* Radeon NV-100 */
+#define PCI_DEVICE_ID_ATI_RADEON_N1 0x5159
+#define PCI_DEVICE_ID_ATI_RADEON_N2 0x515a
+/* Radeon */
+#define PCI_DEVICE_ID_ATI_RADEON_RA 0x5144
+#define PCI_DEVICE_ID_ATI_RADEON_RB 0x5145
+#define PCI_DEVICE_ID_ATI_RADEON_RC 0x5146
+#define PCI_DEVICE_ID_ATI_RADEON_RD 0x5147
+/* RadeonIGP */
+#define PCI_DEVICE_ID_ATI_RADEON_IGP 0xCAB0
+
+#define PCI_VENDOR_ID_VLSI 0x1004
+#define PCI_DEVICE_ID_VLSI_82C592 0x0005
+#define PCI_DEVICE_ID_VLSI_82C593 0x0006
+#define PCI_DEVICE_ID_VLSI_82C594 0x0007
+#define PCI_DEVICE_ID_VLSI_82C597 0x0009
+#define PCI_DEVICE_ID_VLSI_82C541 0x000c
+#define PCI_DEVICE_ID_VLSI_82C543 0x000d
+#define PCI_DEVICE_ID_VLSI_82C532 0x0101
+#define PCI_DEVICE_ID_VLSI_82C534 0x0102
+#define PCI_DEVICE_ID_VLSI_82C535 0x0104
+#define PCI_DEVICE_ID_VLSI_82C147 0x0105
+#define PCI_DEVICE_ID_VLSI_VAS96011 0x0702
+
+#define PCI_VENDOR_ID_ADL 0x1005
+#define PCI_DEVICE_ID_ADL_2301 0x2301
+
+#define PCI_VENDOR_ID_NS 0x100b
+#define PCI_DEVICE_ID_NS_87415 0x0002
+#define PCI_DEVICE_ID_NS_87560_LIO 0x000e
+#define PCI_DEVICE_ID_NS_87560_USB 0x0012
+#define PCI_DEVICE_ID_NS_83815 0x0020
+#define PCI_DEVICE_ID_NS_83820 0x0022
+#define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500
+#define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501
+#define PCI_DEVICE_ID_NS_SCx200_IDE 0x0502
+#define PCI_DEVICE_ID_NS_SCx200_AUDIO 0x0503
+#define PCI_DEVICE_ID_NS_SCx200_VIDEO 0x0504
+#define PCI_DEVICE_ID_NS_SCx200_XBUS 0x0505
+#define PCI_DEVICE_ID_NS_87410 0xd001
+
+#define PCI_VENDOR_ID_TSENG 0x100c
+#define PCI_DEVICE_ID_TSENG_W32P_2 0x3202
+#define PCI_DEVICE_ID_TSENG_W32P_b 0x3205
+#define PCI_DEVICE_ID_TSENG_W32P_c 0x3206
+#define PCI_DEVICE_ID_TSENG_W32P_d 0x3207
+#define PCI_DEVICE_ID_TSENG_ET6000 0x3208
+
+#define PCI_VENDOR_ID_WEITEK 0x100e
+#define PCI_DEVICE_ID_WEITEK_P9000 0x9001
+#define PCI_DEVICE_ID_WEITEK_P9100 0x9100
+
+#define PCI_VENDOR_ID_DEC 0x1011
+#define PCI_DEVICE_ID_DEC_BRD 0x0001
+#define PCI_DEVICE_ID_DEC_TULIP 0x0002
+#define PCI_DEVICE_ID_DEC_TGA 0x0004
+#define PCI_DEVICE_ID_DEC_TULIP_FAST 0x0009
+#define PCI_DEVICE_ID_DEC_TGA2 0x000D
+#define PCI_DEVICE_ID_DEC_FDDI 0x000F
+#define PCI_DEVICE_ID_DEC_TULIP_PLUS 0x0014
+#define PCI_DEVICE_ID_DEC_21142 0x0019
+#define PCI_DEVICE_ID_DEC_21052 0x0021
+#define PCI_DEVICE_ID_DEC_21150 0x0022
+#define PCI_DEVICE_ID_DEC_21152 0x0024
+#define PCI_DEVICE_ID_DEC_21153 0x0025
+#define PCI_DEVICE_ID_DEC_21154 0x0026
+#define PCI_DEVICE_ID_DEC_21285 0x1065
+#define PCI_DEVICE_ID_COMPAQ_42XX 0x0046
+
+#define PCI_VENDOR_ID_CIRRUS 0x1013
+#define PCI_DEVICE_ID_CIRRUS_7548 0x0038
+#define PCI_DEVICE_ID_CIRRUS_5430 0x00a0
+#define PCI_DEVICE_ID_CIRRUS_5434_4 0x00a4
+#define PCI_DEVICE_ID_CIRRUS_5434_8 0x00a8
+#define PCI_DEVICE_ID_CIRRUS_5436 0x00ac
+#define PCI_DEVICE_ID_CIRRUS_5446 0x00b8
+#define PCI_DEVICE_ID_CIRRUS_5480 0x00bc
+#define PCI_DEVICE_ID_CIRRUS_5462 0x00d0
+#define PCI_DEVICE_ID_CIRRUS_5464 0x00d4
+#define PCI_DEVICE_ID_CIRRUS_5465 0x00d6
+#define PCI_DEVICE_ID_CIRRUS_6729 0x1100
+#define PCI_DEVICE_ID_CIRRUS_6832 0x1110
+#define PCI_DEVICE_ID_CIRRUS_7542 0x1200
+#define PCI_DEVICE_ID_CIRRUS_7543 0x1202
+#define PCI_DEVICE_ID_CIRRUS_7541 0x1204
+
+#define PCI_VENDOR_ID_IBM 0x1014
+#define PCI_DEVICE_ID_IBM_FIRE_CORAL 0x000a
+#define PCI_DEVICE_ID_IBM_TR 0x0018
+#define PCI_DEVICE_ID_IBM_82G2675 0x001d
+#define PCI_DEVICE_ID_IBM_MCA 0x0020
+#define PCI_DEVICE_ID_IBM_82351 0x0022
+#define PCI_DEVICE_ID_IBM_PYTHON 0x002d
+#define PCI_DEVICE_ID_IBM_SERVERAID 0x002e
+#define PCI_DEVICE_ID_IBM_TR_WAKE 0x003e
+#define PCI_DEVICE_ID_IBM_MPIC 0x0046
+#define PCI_DEVICE_ID_IBM_3780IDSP 0x007d
+#define PCI_DEVICE_ID_IBM_CHUKAR 0x0096
+#define PCI_DEVICE_ID_IBM_CPC710_PCI64 0x00fc
+#define PCI_DEVICE_ID_IBM_CPC710_PCI32 0x0105
+#define PCI_DEVICE_ID_IBM_405GP 0x0156
+#define PCI_DEVICE_ID_IBM_SERVERAIDI960 0x01bd
+#define PCI_DEVICE_ID_IBM_MPIC_2 0xffff
+
+#define PCI_VENDOR_ID_COMPEX2 0x101a // pci.ids says "AT&T GIS (NCR)"
+#define PCI_DEVICE_ID_COMPEX2_100VG 0x0005
+
+#define PCI_VENDOR_ID_WD 0x101c
+#define PCI_DEVICE_ID_WD_7197 0x3296
+#define PCI_DEVICE_ID_WD_90C 0xc24a
+
+#define PCI_VENDOR_ID_AMI 0x101e
+#define PCI_DEVICE_ID_AMI_MEGARAID3 0x1960
+#define PCI_DEVICE_ID_AMI_MEGARAID 0x9010
+#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060
+
+#define PCI_VENDOR_ID_AMD 0x1022
+#define PCI_DEVICE_ID_AMD_LANCE 0x2000
+#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
+#define PCI_DEVICE_ID_AMD_SCSI 0x2020
+#define PCI_DEVICE_ID_AMD_FE_GATE_7006 0x7006
+#define PCI_DEVICE_ID_AMD_FE_GATE_7007 0x7007
+#define PCI_DEVICE_ID_AMD_FE_GATE_700C 0x700C
+#define PCI_DEVICE_ID_AMD_FE_GATE_700D 0x700D
+#define PCI_DEVICE_ID_AMD_FE_GATE_700E 0x700E
+#define PCI_DEVICE_ID_AMD_FE_GATE_700F 0x700F
+#define PCI_DEVICE_ID_AMD_COBRA_7400 0x7400
+#define PCI_DEVICE_ID_AMD_COBRA_7401 0x7401
+#define PCI_DEVICE_ID_AMD_COBRA_7403 0x7403
+#define PCI_DEVICE_ID_AMD_COBRA_7404 0x7404
+#define PCI_DEVICE_ID_AMD_VIPER_7408 0x7408
+#define PCI_DEVICE_ID_AMD_VIPER_7409 0x7409
+#define PCI_DEVICE_ID_AMD_VIPER_740B 0x740B
+#define PCI_DEVICE_ID_AMD_VIPER_740C 0x740C
+#define PCI_DEVICE_ID_AMD_VIPER_7410 0x7410
+#define PCI_DEVICE_ID_AMD_VIPER_7411 0x7411
+#define PCI_DEVICE_ID_AMD_VIPER_7413 0x7413
+#define PCI_DEVICE_ID_AMD_VIPER_7414 0x7414
+#define PCI_DEVICE_ID_AMD_OPUS_7440 0x7440
+# define PCI_DEVICE_ID_AMD_VIPER_7440 PCI_DEVICE_ID_AMD_OPUS_7440
+#define PCI_DEVICE_ID_AMD_OPUS_7441 0x7441
+# define PCI_DEVICE_ID_AMD_VIPER_7441 PCI_DEVICE_ID_AMD_OPUS_7441
+#define PCI_DEVICE_ID_AMD_OPUS_7443 0x7443
+# define PCI_DEVICE_ID_AMD_VIPER_7443 PCI_DEVICE_ID_AMD_OPUS_7443
+#define PCI_DEVICE_ID_AMD_OPUS_7448 0x7448
+# define PCI_DEVICE_ID_AMD_VIPER_7448 PCI_DEVICE_ID_AMD_OPUS_7448
+#define PCI_DEVICE_ID_AMD_OPUS_7449 0x7449
+# define PCI_DEVICE_ID_AMD_VIPER_7449 PCI_DEVICE_ID_AMD_OPUS_7449
+#define PCI_DEVICE_ID_AMD_8111_LAN 0x7462
+#define PCI_DEVICE_ID_AMD_8111_IDE 0x7469
+#define PCI_DEVICE_ID_AMD_8111_AC97 0x746d
+
+#define PCI_VENDOR_ID_TRIDENT 0x1023
+#define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX 0x2000
+#define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX 0x2001
+#define PCI_DEVICE_ID_TRIDENT_9320 0x9320
+#define PCI_DEVICE_ID_TRIDENT_9388 0x9388
+#define PCI_DEVICE_ID_TRIDENT_9397 0x9397
+#define PCI_DEVICE_ID_TRIDENT_939A 0x939A
+#define PCI_DEVICE_ID_TRIDENT_9520 0x9520
+#define PCI_DEVICE_ID_TRIDENT_9525 0x9525
+#define PCI_DEVICE_ID_TRIDENT_9420 0x9420
+#define PCI_DEVICE_ID_TRIDENT_9440 0x9440
+#define PCI_DEVICE_ID_TRIDENT_9660 0x9660
+#define PCI_DEVICE_ID_TRIDENT_9750 0x9750
+#define PCI_DEVICE_ID_TRIDENT_9850 0x9850
+#define PCI_DEVICE_ID_TRIDENT_9880 0x9880
+#define PCI_DEVICE_ID_TRIDENT_8400 0x8400
+#define PCI_DEVICE_ID_TRIDENT_8420 0x8420
+#define PCI_DEVICE_ID_TRIDENT_8500 0x8500
+
+#define PCI_VENDOR_ID_AI 0x1025
+#define PCI_DEVICE_ID_AI_M1435 0x1435
+
+#define PCI_VENDOR_ID_DELL 0x1028
+
+#define PCI_VENDOR_ID_MATROX 0x102B
+#define PCI_DEVICE_ID_MATROX_MGA_2 0x0518
+#define PCI_DEVICE_ID_MATROX_MIL 0x0519
+#define PCI_DEVICE_ID_MATROX_MYS 0x051A
+#define PCI_DEVICE_ID_MATROX_MIL_2 0x051b
+#define PCI_DEVICE_ID_MATROX_MIL_2_AGP 0x051f
+#define PCI_DEVICE_ID_MATROX_MGA_IMP 0x0d10
+#define PCI_DEVICE_ID_MATROX_G100_MM 0x1000
+#define PCI_DEVICE_ID_MATROX_G100_AGP 0x1001
+#define PCI_DEVICE_ID_MATROX_G200_PCI 0x0520
+#define PCI_DEVICE_ID_MATROX_G200_AGP 0x0521
+#define PCI_DEVICE_ID_MATROX_G400 0x0525
+#define PCI_DEVICE_ID_MATROX_G550 0x2527
+#define PCI_DEVICE_ID_MATROX_VIA 0x4536
+
+#define PCI_VENDOR_ID_CT 0x102c
+#define PCI_DEVICE_ID_CT_65545 0x00d8
+#define PCI_DEVICE_ID_CT_65548 0x00dc
+#define PCI_DEVICE_ID_CT_65550 0x00e0
+#define PCI_DEVICE_ID_CT_65554 0x00e4
+#define PCI_DEVICE_ID_CT_65555 0x00e5
+
+#define PCI_VENDOR_ID_MIRO 0x1031
+#define PCI_DEVICE_ID_MIRO_36050 0x5601
+
+#define PCI_VENDOR_ID_NEC 0x1033
+#define PCI_DEVICE_ID_NEC_PCX2 0x0046
+#define PCI_DEVICE_ID_NEC_NILE4 0x005a
+#define PCI_DEVICE_ID_NEC_VRC5476 0x009b
+#define PCI_DEVICE_ID_NEC_VRC5477_AC97 0x00a6
+
+#define PCI_VENDOR_ID_FD 0x1036
+#define PCI_DEVICE_ID_FD_36C70 0x0000
+
+#define PCI_VENDOR_ID_SI 0x1039
+#define PCI_DEVICE_ID_SI_5591_AGP 0x0001
+#define PCI_DEVICE_ID_SI_6202 0x0002
+#define PCI_DEVICE_ID_SI_503 0x0008
+#define PCI_DEVICE_ID_SI_ACPI 0x0009
+#define PCI_DEVICE_ID_SI_5597_VGA 0x0200
+#define PCI_DEVICE_ID_SI_6205 0x0205
+#define PCI_DEVICE_ID_SI_501 0x0406
+#define PCI_DEVICE_ID_SI_496 0x0496
+#define PCI_DEVICE_ID_SI_300 0x0300
+#define PCI_DEVICE_ID_SI_315H 0x0310
+#define PCI_DEVICE_ID_SI_315 0x0315
+#define PCI_DEVICE_ID_SI_315PRO 0x0325
+#define PCI_DEVICE_ID_SI_530 0x0530
+#define PCI_DEVICE_ID_SI_540 0x0540
+#define PCI_DEVICE_ID_SI_550 0x0550
+#define PCI_DEVICE_ID_SI_540_VGA 0x5300
+#define PCI_DEVICE_ID_SI_550_VGA 0x5315
+#define PCI_DEVICE_ID_SI_601 0x0601
+#define PCI_DEVICE_ID_SI_620 0x0620
+#define PCI_DEVICE_ID_SI_630 0x0630
+#define PCI_DEVICE_ID_SI_635 0x0635
+#define PCI_DEVICE_ID_SI_640 0x0640
+#define PCI_DEVICE_ID_SI_645 0x0645
+#define PCI_DEVICE_ID_SI_646 0x0646
+#define PCI_DEVICE_ID_SI_648 0x0648
+#define PCI_DEVICE_ID_SI_650 0x0650
+#define PCI_DEVICE_ID_SI_651 0x0651
+#define PCI_DEVICE_ID_SI_652 0x0652
+#define PCI_DEVICE_ID_SI_730 0x0730
+#define PCI_DEVICE_ID_SI_630_VGA 0x6300
+#define PCI_DEVICE_ID_SI_730_VGA 0x7300
+#define PCI_DEVICE_ID_SI_735 0x0735
+#define PCI_DEVICE_ID_SI_740 0x0740
+#define PCI_DEVICE_ID_SI_745 0x0745
+#define PCI_DEVICE_ID_SI_746 0x0746
+#define PCI_DEVICE_ID_SI_748 0x0748
+#define PCI_DEVICE_ID_SI_750 0x0750
+#define PCI_DEVICE_ID_SI_751 0x0751
+#define PCI_DEVICE_ID_SI_752 0x0752
+#define PCI_DEVICE_ID_SI_900 0x0900
+#define PCI_DEVICE_ID_SI_5107 0x5107
+#define PCI_DEVICE_ID_SI_5300 0x5300
+#define PCI_DEVICE_ID_SI_5511 0x5511
+#define PCI_DEVICE_ID_SI_5513 0x5513
+#define PCI_DEVICE_ID_SI_5518 0x5518
+#define PCI_DEVICE_ID_SI_5571 0x5571
+#define PCI_DEVICE_ID_SI_5591 0x5591
+#define PCI_DEVICE_ID_SI_5597 0x5597
+#define PCI_DEVICE_ID_SI_5598 0x5598
+#define PCI_DEVICE_ID_SI_5600 0x5600
+#define PCI_DEVICE_ID_SI_6300 0x6300
+#define PCI_DEVICE_ID_SI_6306 0x6306
+#define PCI_DEVICE_ID_SI_6326 0x6326
+#define PCI_DEVICE_ID_SI_7001 0x7001
+#define PCI_DEVICE_ID_SI_7016 0x7016
+
+#define PCI_VENDOR_ID_HP 0x103c
+#define PCI_DEVICE_ID_HP_DONNER_GFX 0x1008
+#define PCI_DEVICE_ID_HP_TACHYON 0x1028
+#define PCI_DEVICE_ID_HP_TACHLITE 0x1029
+#define PCI_DEVICE_ID_HP_J2585A 0x1030
+#define PCI_DEVICE_ID_HP_J2585B 0x1031
+#define PCI_DEVICE_ID_HP_SAS 0x1048
+#define PCI_DEVICE_ID_HP_DIVA1 0x1049
+#define PCI_DEVICE_ID_HP_DIVA2 0x104A
+#define PCI_DEVICE_ID_HP_SP2_0 0x104B
+#define PCI_DEVICE_ID_HP_REO_SBA 0x10f0
+#define PCI_DEVICE_ID_HP_REO_IOC 0x10f1
+#define PCI_DEVICE_ID_HP_ZX1_SBA 0x1229
+#define PCI_DEVICE_ID_HP_ZX1_IOC 0x122a
+#define PCI_DEVICE_ID_HP_ZX1_LBA 0x122e
+
+#define PCI_VENDOR_ID_PCTECH 0x1042
+#define PCI_DEVICE_ID_PCTECH_RZ1000 0x1000
+#define PCI_DEVICE_ID_PCTECH_RZ1001 0x1001
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_0 0x3000
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_1 0x3010
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_IDE 0x3020
+
+#define PCI_VENDOR_ID_ASUSTEK 0x1043
+#define PCI_DEVICE_ID_ASUSTEK_0675 0x0675
+
+#define PCI_VENDOR_ID_DPT 0x1044
+#define PCI_DEVICE_ID_DPT 0xa400
+
+#define PCI_VENDOR_ID_OPTI 0x1045
+#define PCI_DEVICE_ID_OPTI_92C178 0xc178
+#define PCI_DEVICE_ID_OPTI_82C557 0xc557
+#define PCI_DEVICE_ID_OPTI_82C558 0xc558
+#define PCI_DEVICE_ID_OPTI_82C621 0xc621
+#define PCI_DEVICE_ID_OPTI_82C700 0xc700
+#define PCI_DEVICE_ID_OPTI_82C701 0xc701
+#define PCI_DEVICE_ID_OPTI_82C814 0xc814
+#define PCI_DEVICE_ID_OPTI_82C822 0xc822
+#define PCI_DEVICE_ID_OPTI_82C861 0xc861
+#define PCI_DEVICE_ID_OPTI_82C825 0xd568
+
+#define PCI_VENDOR_ID_ELSA 0x1048
+#define PCI_DEVICE_ID_ELSA_MICROLINK 0x1000
+#define PCI_DEVICE_ID_ELSA_QS3000 0x3000
+
+#define PCI_VENDOR_ID_SGS 0x104a
+#define PCI_DEVICE_ID_SGS_2000 0x0008
+#define PCI_DEVICE_ID_SGS_1764 0x0009
+
+#define PCI_VENDOR_ID_BUSLOGIC 0x104B
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER 0x1040
+#define PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT 0x8130
+
+#define PCI_VENDOR_ID_TI 0x104c
+#define PCI_DEVICE_ID_TI_TVP4010 0x3d04
+#define PCI_DEVICE_ID_TI_TVP4020 0x3d07
+#define PCI_DEVICE_ID_TI_1130 0xac12
+#define PCI_DEVICE_ID_TI_1031 0xac13
+#define PCI_DEVICE_ID_TI_1131 0xac15
+#define PCI_DEVICE_ID_TI_1250 0xac16
+#define PCI_DEVICE_ID_TI_1220 0xac17
+#define PCI_DEVICE_ID_TI_1221 0xac19
+#define PCI_DEVICE_ID_TI_1210 0xac1a
+#define PCI_DEVICE_ID_TI_1410 0xac50
+#define PCI_DEVICE_ID_TI_1450 0xac1b
+#define PCI_DEVICE_ID_TI_1225 0xac1c
+#define PCI_DEVICE_ID_TI_1251A 0xac1d
+#define PCI_DEVICE_ID_TI_1211 0xac1e
+#define PCI_DEVICE_ID_TI_1251B 0xac1f
+#define PCI_DEVICE_ID_TI_4410 0xac41
+#define PCI_DEVICE_ID_TI_4451 0xac42
+#define PCI_DEVICE_ID_TI_1420 0xac51
+
+#define PCI_VENDOR_ID_SONY 0x104d
+#define PCI_DEVICE_ID_SONY_CXD3222 0x8039
+
+#define PCI_VENDOR_ID_OAK 0x104e
+#define PCI_DEVICE_ID_OAK_OTI107 0x0107
+
+/* Winbond have two vendor IDs! See 0x10ad as well */
+#define PCI_VENDOR_ID_WINBOND2 0x1050
+#define PCI_DEVICE_ID_WINBOND2_89C940 0x0940
+#define PCI_DEVICE_ID_WINBOND2_89C940F 0x5a5a
+#define PCI_DEVICE_ID_WINBOND2_6692 0x6692
+
+#define PCI_VENDOR_ID_ANIGMA 0x1051
+#define PCI_DEVICE_ID_ANIGMA_MC145575 0x0100
+
+#define PCI_VENDOR_ID_EFAR 0x1055
+#define PCI_DEVICE_ID_EFAR_SLC90E66_1 0x9130
+#define PCI_DEVICE_ID_EFAR_SLC90E66_0 0x9460
+#define PCI_DEVICE_ID_EFAR_SLC90E66_2 0x9462
+#define PCI_DEVICE_ID_EFAR_SLC90E66_3 0x9463
+
+#define PCI_VENDOR_ID_MOTOROLA 0x1057
+#define PCI_VENDOR_ID_MOTOROLA_OOPS 0x1507
+#define PCI_DEVICE_ID_MOTOROLA_MPC105 0x0001
+#define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002
+#define PCI_DEVICE_ID_MOTOROLA_MPC107 0x0004
+#define PCI_DEVICE_ID_MOTOROLA_RAVEN 0x4801
+#define PCI_DEVICE_ID_MOTOROLA_FALCON 0x4802
+#define PCI_DEVICE_ID_MOTOROLA_HAWK 0x4803
+#define PCI_DEVICE_ID_MOTOROLA_CPX8216 0x4806
+
+#define PCI_VENDOR_ID_PROMISE 0x105a
+#define PCI_DEVICE_ID_PROMISE_20265 0x0d30
+#define PCI_DEVICE_ID_PROMISE_20267 0x4d30
+#define PCI_DEVICE_ID_PROMISE_20246 0x4d33
+#define PCI_DEVICE_ID_PROMISE_20262 0x4d38
+#define PCI_DEVICE_ID_PROMISE_20263 0x0D38
+#define PCI_DEVICE_ID_PROMISE_20268 0x4d68
+#define PCI_DEVICE_ID_PROMISE_20270 0x6268
+#define PCI_DEVICE_ID_PROMISE_20269 0x4d69
+#define PCI_DEVICE_ID_PROMISE_20271 0x6269
+#define PCI_DEVICE_ID_PROMISE_20275 0x1275
+#define PCI_DEVICE_ID_PROMISE_20276 0x5275
+#define PCI_DEVICE_ID_PROMISE_20277 0x7275
+#define PCI_DEVICE_ID_PROMISE_5300 0x5300
+
+#define PCI_VENDOR_ID_N9 0x105d
+#define PCI_DEVICE_ID_N9_I128 0x2309
+#define PCI_DEVICE_ID_N9_I128_2 0x2339
+#define PCI_DEVICE_ID_N9_I128_T2R 0x493d
+
+#define PCI_VENDOR_ID_UMC 0x1060
+#define PCI_DEVICE_ID_UMC_UM8673F 0x0101
+#define PCI_DEVICE_ID_UMC_UM8891A 0x0891
+#define PCI_DEVICE_ID_UMC_UM8886BF 0x673a
+#define PCI_DEVICE_ID_UMC_UM8886A 0x886a
+#define PCI_DEVICE_ID_UMC_UM8881F 0x8881
+#define PCI_DEVICE_ID_UMC_UM8886F 0x8886
+#define PCI_DEVICE_ID_UMC_UM9017F 0x9017
+#define PCI_DEVICE_ID_UMC_UM8886N 0xe886
+#define PCI_DEVICE_ID_UMC_UM8891N 0xe891
+
+#define PCI_VENDOR_ID_X 0x1061
+#define PCI_DEVICE_ID_X_AGX016 0x0001
+
+#define PCI_VENDOR_ID_MYLEX 0x1069
+#define PCI_DEVICE_ID_MYLEX_DAC960_P 0x0001
+#define PCI_DEVICE_ID_MYLEX_DAC960_PD 0x0002
+#define PCI_DEVICE_ID_MYLEX_DAC960_PG 0x0010
+#define PCI_DEVICE_ID_MYLEX_DAC960_LA 0x0020
+#define PCI_DEVICE_ID_MYLEX_DAC960_LP 0x0050
+#define PCI_DEVICE_ID_MYLEX_DAC960_BA 0xBA56
+
+#define PCI_VENDOR_ID_PICOP 0x1066
+#define PCI_DEVICE_ID_PICOP_PT86C52X 0x0001
+#define PCI_DEVICE_ID_PICOP_PT80C524 0x8002
+
+#define PCI_VENDOR_ID_APPLE 0x106b
+#define PCI_DEVICE_ID_APPLE_BANDIT 0x0001
+#define PCI_DEVICE_ID_APPLE_GC 0x0002
+#define PCI_DEVICE_ID_APPLE_HYDRA 0x000e
+#define PCI_DEVICE_ID_APPLE_UNI_N_FW 0x0018
+#define PCI_DEVICE_ID_APPLE_KL_USB 0x0019
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP 0x0020
+#define PCI_DEVICE_ID_APPLE_UNI_N_GMAC 0x0021
+#define PCI_DEVICE_ID_APPLE_KEYLARGO 0x0022
+#define PCI_DEVICE_ID_APPLE_UNI_N_GMACP 0x0024
+#define PCI_DEVICE_ID_APPLE_KEYLARGO_P 0x0025
+#define PCI_DEVICE_ID_APPLE_KL_USB_P 0x0026
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP_P 0x0027
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP15 0x002d
+#define PCI_DEVICE_ID_APPLE_UNI_N_FW2 0x0030
+
+#define PCI_VENDOR_ID_YAMAHA 0x1073
+#define PCI_DEVICE_ID_YAMAHA_724 0x0004
+#define PCI_DEVICE_ID_YAMAHA_724F 0x000d
+#define PCI_DEVICE_ID_YAMAHA_740 0x000a
+#define PCI_DEVICE_ID_YAMAHA_740C 0x000c
+#define PCI_DEVICE_ID_YAMAHA_744 0x0010
+#define PCI_DEVICE_ID_YAMAHA_754 0x0012
+
+#define PCI_VENDOR_ID_NEXGEN 0x1074
+#define PCI_DEVICE_ID_NEXGEN_82C501 0x4e78
+
+#define PCI_VENDOR_ID_QLOGIC 0x1077
+#define PCI_DEVICE_ID_QLOGIC_ISP1020 0x1020
+#define PCI_DEVICE_ID_QLOGIC_ISP1022 0x1022
+#define PCI_DEVICE_ID_QLOGIC_ISP2100 0x2100
+#define PCI_DEVICE_ID_QLOGIC_ISP2200 0x2200
+
+#define PCI_VENDOR_ID_CYRIX 0x1078
+#define PCI_DEVICE_ID_CYRIX_5510 0x0000
+#define PCI_DEVICE_ID_CYRIX_PCI_MASTER 0x0001
+#define PCI_DEVICE_ID_CYRIX_5520 0x0002
+#define PCI_DEVICE_ID_CYRIX_5530_LEGACY 0x0100
+#define PCI_DEVICE_ID_CYRIX_5530_SMI 0x0101
+#define PCI_DEVICE_ID_CYRIX_5530_IDE 0x0102
+#define PCI_DEVICE_ID_CYRIX_5530_AUDIO 0x0103
+#define PCI_DEVICE_ID_CYRIX_5530_VIDEO 0x0104
+
+#define PCI_VENDOR_ID_LEADTEK 0x107d
+#define PCI_DEVICE_ID_LEADTEK_805 0x0000
+
+#define PCI_VENDOR_ID_INTERPHASE 0x107e
+#define PCI_DEVICE_ID_INTERPHASE_5526 0x0004
+#define PCI_DEVICE_ID_INTERPHASE_55x6 0x0005
+#define PCI_DEVICE_ID_INTERPHASE_5575 0x0008
+
+#define PCI_VENDOR_ID_CONTAQ 0x1080
+#define PCI_DEVICE_ID_CONTAQ_82C599 0x0600
+#define PCI_DEVICE_ID_CONTAQ_82C693 0xc693
+
+#define PCI_VENDOR_ID_FOREX 0x1083
+
+#define PCI_VENDOR_ID_OLICOM 0x108d
+#define PCI_DEVICE_ID_OLICOM_OC3136 0x0001
+#define PCI_DEVICE_ID_OLICOM_OC2315 0x0011
+#define PCI_DEVICE_ID_OLICOM_OC2325 0x0012
+#define PCI_DEVICE_ID_OLICOM_OC2183 0x0013
+#define PCI_DEVICE_ID_OLICOM_OC2326 0x0014
+#define PCI_DEVICE_ID_OLICOM_OC6151 0x0021
+
+#define PCI_VENDOR_ID_SUN 0x108e
+#define PCI_DEVICE_ID_SUN_EBUS 0x1000
+#define PCI_DEVICE_ID_SUN_HAPPYMEAL 0x1001
+#define PCI_DEVICE_ID_SUN_RIO_EBUS 0x1100
+#define PCI_DEVICE_ID_SUN_RIO_GEM 0x1101
+#define PCI_DEVICE_ID_SUN_RIO_1394 0x1102
+#define PCI_DEVICE_ID_SUN_RIO_USB 0x1103
+#define PCI_DEVICE_ID_SUN_GEM 0x2bad
+#define PCI_DEVICE_ID_SUN_SIMBA 0x5000
+#define PCI_DEVICE_ID_SUN_PBM 0x8000
+#define PCI_DEVICE_ID_SUN_SCHIZO 0x8001
+#define PCI_DEVICE_ID_SUN_SABRE 0xa000
+#define PCI_DEVICE_ID_SUN_HUMMINGBIRD 0xa001
+
+#define PCI_VENDOR_ID_CMD 0x1095
+#define PCI_DEVICE_ID_CMD_640 0x0640
+#define PCI_DEVICE_ID_CMD_643 0x0643
+#define PCI_DEVICE_ID_CMD_646 0x0646
+#define PCI_DEVICE_ID_CMD_647 0x0647
+#define PCI_DEVICE_ID_CMD_648 0x0648
+#define PCI_DEVICE_ID_CMD_649 0x0649
+#define PCI_DEVICE_ID_CMD_670 0x0670
+#define PCI_DEVICE_ID_CMD_680 0x0680
+
+#define PCI_DEVICE_ID_SII_680 0x0680
+#define PCI_DEVICE_ID_SII_3112 0x3112
+
+#define PCI_VENDOR_ID_VISION 0x1098
+#define PCI_DEVICE_ID_VISION_QD8500 0x0001
+#define PCI_DEVICE_ID_VISION_QD8580 0x0002
+
+#define PCI_VENDOR_ID_BROOKTREE 0x109e
+#define PCI_DEVICE_ID_BROOKTREE_848 0x0350
+#define PCI_DEVICE_ID_BROOKTREE_849A 0x0351
+#define PCI_DEVICE_ID_BROOKTREE_878_1 0x036e
+#define PCI_DEVICE_ID_BROOKTREE_878 0x0878
+#define PCI_DEVICE_ID_BROOKTREE_8474 0x8474
+
+#define PCI_VENDOR_ID_SIERRA 0x10a8
+#define PCI_DEVICE_ID_SIERRA_STB 0x0000
+
+#define PCI_VENDOR_ID_SGI 0x10a9
+#define PCI_DEVICE_ID_SGI_IOC3 0x0003
+
+#define PCI_VENDOR_ID_ACC 0x10aa
+#define PCI_DEVICE_ID_ACC_2056 0x0000
+
+#define PCI_VENDOR_ID_WINBOND 0x10ad
+#define PCI_DEVICE_ID_WINBOND_83769 0x0001
+#define PCI_DEVICE_ID_WINBOND_82C105 0x0105
+#define PCI_DEVICE_ID_WINBOND_83C553 0x0565
+
+#define PCI_VENDOR_ID_DATABOOK 0x10b3
+#define PCI_DEVICE_ID_DATABOOK_87144 0xb106
+
+#define PCI_VENDOR_ID_PLX 0x10b5
+#define PCI_DEVICE_ID_PLX_R685 0x1030
+#define PCI_DEVICE_ID_PLX_ROMULUS 0x106a
+#define PCI_DEVICE_ID_PLX_SPCOM800 0x1076
+#define PCI_DEVICE_ID_PLX_1077 0x1077
+#define PCI_DEVICE_ID_PLX_SPCOM200 0x1103
+#define PCI_DEVICE_ID_PLX_DJINN_ITOO 0x1151
+#define PCI_DEVICE_ID_PLX_R753 0x1152
+#define PCI_DEVICE_ID_PLX_9050 0x9050
+#define PCI_DEVICE_ID_PLX_9060 0x9060
+#define PCI_DEVICE_ID_PLX_9060ES 0x906E
+#define PCI_DEVICE_ID_PLX_9060SD 0x906D
+#define PCI_DEVICE_ID_PLX_9080 0x9080
+#define PCI_DEVICE_ID_PLX_GTEK_SERIAL2 0xa001
+
+#define PCI_VENDOR_ID_MADGE 0x10b6
+#define PCI_DEVICE_ID_MADGE_MK2 0x0002
+#define PCI_DEVICE_ID_MADGE_C155S 0x1001
+
+#define PCI_VENDOR_ID_3COM 0x10b7
+#define PCI_DEVICE_ID_3COM_3C985 0x0001
+#define PCI_DEVICE_ID_3COM_3C339 0x3390
+#define PCI_DEVICE_ID_3COM_3C590 0x5900
+#define PCI_DEVICE_ID_3COM_3C595TX 0x5950
+#define PCI_DEVICE_ID_3COM_3C595T4 0x5951
+#define PCI_DEVICE_ID_3COM_3C595MII 0x5952
+#define PCI_DEVICE_ID_3COM_3C900TPO 0x9000
+#define PCI_DEVICE_ID_3COM_3C900COMBO 0x9001
+#define PCI_DEVICE_ID_3COM_3C905TX 0x9050
+#define PCI_DEVICE_ID_3COM_3C905T4 0x9051
+#define PCI_DEVICE_ID_3COM_3C905B_TX 0x9055
+
+#define PCI_VENDOR_ID_SMC 0x10b8
+#define PCI_DEVICE_ID_SMC_EPIC100 0x0005
+
+#define PCI_VENDOR_ID_AL 0x10b9
+#define PCI_DEVICE_ID_AL_M1445 0x1445
+#define PCI_DEVICE_ID_AL_M1449 0x1449
+#define PCI_DEVICE_ID_AL_M1451 0x1451
+#define PCI_DEVICE_ID_AL_M1461 0x1461
+#define PCI_DEVICE_ID_AL_M1489 0x1489
+#define PCI_DEVICE_ID_AL_M1511 0x1511
+#define PCI_DEVICE_ID_AL_M1513 0x1513
+#define PCI_DEVICE_ID_AL_M1521 0x1521
+#define PCI_DEVICE_ID_AL_M1523 0x1523
+#define PCI_DEVICE_ID_AL_M1531 0x1531
+#define PCI_DEVICE_ID_AL_M1533 0x1533
+#define PCI_DEVICE_ID_AL_M1535 0x1535
+#define PCI_DEVICE_ID_AL_M1541 0x1541
+#define PCI_DEVICE_ID_AL_M1621 0x1621
+#define PCI_DEVICE_ID_AL_M1631 0x1631
+#define PCI_DEVICE_ID_AL_M1641 0x1641
+#define PCI_DEVICE_ID_AL_M1644 0x1644
+#define PCI_DEVICE_ID_AL_M1647 0x1647
+#define PCI_DEVICE_ID_AL_M1651 0x1651
+#define PCI_DEVICE_ID_AL_M1543 0x1543
+#define PCI_DEVICE_ID_AL_M3307 0x3307
+#define PCI_DEVICE_ID_AL_M4803 0x5215
+#define PCI_DEVICE_ID_AL_M5219 0x5219
+#define PCI_DEVICE_ID_AL_M5229 0x5229
+#define PCI_DEVICE_ID_AL_M5237 0x5237
+#define PCI_DEVICE_ID_AL_M5243 0x5243
+#define PCI_DEVICE_ID_AL_M5451 0x5451
+#define PCI_DEVICE_ID_AL_M7101 0x7101
+
+#define PCI_VENDOR_ID_MITSUBISHI 0x10ba
+
+#define PCI_VENDOR_ID_SURECOM 0x10bd
+#define PCI_DEVICE_ID_SURECOM_NE34 0x0e34
+
+#define PCI_VENDOR_ID_NEOMAGIC 0x10c8
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2070 0x0001
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128V 0x0002
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZV 0x0003
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2160 0x0004
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICMEDIA_256AV 0x0005
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZVPLUS 0x0083
+
+#define PCI_VENDOR_ID_ASP 0x10cd
+#define PCI_DEVICE_ID_ASP_ABP940 0x1200
+#define PCI_DEVICE_ID_ASP_ABP940U 0x1300
+#define PCI_DEVICE_ID_ASP_ABP940UW 0x2300
+
+#define PCI_VENDOR_ID_MACRONIX 0x10d9
+#define PCI_DEVICE_ID_MACRONIX_MX98713 0x0512
+#define PCI_DEVICE_ID_MACRONIX_MX987x5 0x0531
+
+#define PCI_VENDOR_ID_TCONRAD 0x10da
+#define PCI_DEVICE_ID_TCONRAD_TOKENRING 0x0508
+
+#define PCI_VENDOR_ID_CERN 0x10dc
+#define PCI_DEVICE_ID_CERN_SPSB_PMC 0x0001
+#define PCI_DEVICE_ID_CERN_SPSB_PCI 0x0002
+#define PCI_DEVICE_ID_CERN_HIPPI_DST 0x0021
+#define PCI_DEVICE_ID_CERN_HIPPI_SRC 0x0022
+
+#define PCI_VENDOR_ID_NVIDIA 0x10de
+#define PCI_DEVICE_ID_NVIDIA_TNT 0x0020
+#define PCI_DEVICE_ID_NVIDIA_TNT2 0x0028
+#define PCI_DEVICE_ID_NVIDIA_UTNT2 0x0029
+#define PCI_DEVICE_ID_NVIDIA_VTNT2 0x002C
+#define PCI_DEVICE_ID_NVIDIA_UVTNT2 0x002D
+#define PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE 0x0065
+#define PCI_DEVICE_ID_NVIDIA_ITNT2 0x00A0
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR 0x0100
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR 0x0101
+#define PCI_DEVICE_ID_NVIDIA_QUADRO 0x0103
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX 0x0110
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX2 0x0111
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GO 0x0112
+#define PCI_DEVICE_ID_NVIDIA_QUADRO2_MXR 0x0113
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS 0x0150
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS2 0x0151
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_ULTRA 0x0152
+#define PCI_DEVICE_ID_NVIDIA_QUADRO2_PRO 0x0153
+#define PCI_DEVICE_ID_NVIDIA_IGEFORCE2 0x01a0
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_IDE 0x01bc
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3 0x0200
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1 0x0201
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_2 0x0202
+#define PCI_DEVICE_ID_NVIDIA_QUADRO_DDC 0x0203
+
+#define PCI_VENDOR_ID_IMS 0x10e0
+#define PCI_DEVICE_ID_IMS_8849 0x8849
+#define PCI_DEVICE_ID_IMS_TT128 0x9128
+#define PCI_DEVICE_ID_IMS_TT3D 0x9135
+
+#define PCI_VENDOR_ID_TEKRAM2 0x10e1
+#define PCI_DEVICE_ID_TEKRAM2_690c 0x690c
+
+#define PCI_VENDOR_ID_TUNDRA 0x10e3
+#define PCI_DEVICE_ID_TUNDRA_CA91C042 0x0000
+
+#define PCI_VENDOR_ID_AMCC 0x10e8
+#define PCI_DEVICE_ID_AMCC_MYRINET 0x8043
+#define PCI_DEVICE_ID_AMCC_PARASTATION 0x8062
+#define PCI_DEVICE_ID_AMCC_S5933 0x807d
+#define PCI_DEVICE_ID_AMCC_S5933_HEPC3 0x809c
+
+#define PCI_VENDOR_ID_INTERG 0x10ea
+#define PCI_DEVICE_ID_INTERG_1680 0x1680
+#define PCI_DEVICE_ID_INTERG_1682 0x1682
+#define PCI_DEVICE_ID_INTERG_2000 0x2000
+#define PCI_DEVICE_ID_INTERG_2010 0x2010
+#define PCI_DEVICE_ID_INTERG_5000 0x5000
+#define PCI_DEVICE_ID_INTERG_5050 0x5050
+
+#define PCI_VENDOR_ID_REALTEK 0x10ec
+#define PCI_DEVICE_ID_REALTEK_8029 0x8029
+#define PCI_DEVICE_ID_REALTEK_8129 0x8129
+#define PCI_DEVICE_ID_REALTEK_8139 0x8139
+#define PCI_DEVICE_ID_REALTEK_8169 0x8169
+
+#define PCI_VENDOR_ID_XILINX 0x10ee
+#define PCI_DEVICE_ID_TURBOPAM 0x4020
+
+#define PCI_VENDOR_ID_TRUEVISION 0x10fa
+#define PCI_DEVICE_ID_TRUEVISION_T1000 0x000c
+
+#define PCI_VENDOR_ID_INIT 0x1101
+#define PCI_DEVICE_ID_INIT_320P 0x9100
+#define PCI_DEVICE_ID_INIT_360P 0x9500
+
+#define PCI_VENDOR_ID_CREATIVE 0x1102 // duplicate: ECTIVA
+#define PCI_DEVICE_ID_CREATIVE_EMU10K1 0x0002
+
+#define PCI_VENDOR_ID_ECTIVA 0x1102 // duplicate: CREATIVE
+#define PCI_DEVICE_ID_ECTIVA_EV1938 0x8938
+
+#define PCI_VENDOR_ID_TTI 0x1103
+#define PCI_DEVICE_ID_TTI_HPT343 0x0003
+#define PCI_DEVICE_ID_TTI_HPT366 0x0004
+#define PCI_DEVICE_ID_TTI_HPT372 0x0005
+#define PCI_DEVICE_ID_TTI_HPT302 0x0006
+#define PCI_DEVICE_ID_TTI_HPT371 0x0007
+#define PCI_DEVICE_ID_TTI_HPT374 0x0008
+
+#define PCI_VENDOR_ID_VIA 0x1106
+#define PCI_DEVICE_ID_VIA_8363_0 0x0305
+#define PCI_DEVICE_ID_VIA_8371_0 0x0391
+#define PCI_DEVICE_ID_VIA_8501_0 0x0501
+#define PCI_DEVICE_ID_VIA_82C505 0x0505
+#define PCI_DEVICE_ID_VIA_82C561 0x0561
+#define PCI_DEVICE_ID_VIA_82C586_1 0x0571
+#define PCI_DEVICE_ID_VIA_82C576 0x0576
+#define PCI_DEVICE_ID_VIA_82C585 0x0585
+#define PCI_DEVICE_ID_VIA_82C586_0 0x0586
+#define PCI_DEVICE_ID_VIA_82C595 0x0595
+#define PCI_DEVICE_ID_VIA_82C596 0x0596
+#define PCI_DEVICE_ID_VIA_82C597_0 0x0597
+#define PCI_DEVICE_ID_VIA_82C598_0 0x0598
+#define PCI_DEVICE_ID_VIA_8601_0 0x0601
+#define PCI_DEVICE_ID_VIA_8605_0 0x0605
+#define PCI_DEVICE_ID_VIA_82C680 0x0680
+#define PCI_DEVICE_ID_VIA_82C686 0x0686
+#define PCI_DEVICE_ID_VIA_82C691 0x0691
+#define PCI_DEVICE_ID_VIA_82C693 0x0693
+#define PCI_DEVICE_ID_VIA_82C693_1 0x0698
+#define PCI_DEVICE_ID_VIA_82C926 0x0926
+#define PCI_DEVICE_ID_VIA_82C576_1 0x1571
+#define PCI_DEVICE_ID_VIA_82C595_97 0x1595
+#define PCI_DEVICE_ID_VIA_82C586_2 0x3038
+#define PCI_DEVICE_ID_VIA_82C586_3 0x3040
+#define PCI_DEVICE_ID_VIA_6305 0x3044
+#define PCI_DEVICE_ID_VIA_82C596_3 0x3050
+#define PCI_DEVICE_ID_VIA_82C596B_3 0x3051
+#define PCI_DEVICE_ID_VIA_82C686_4 0x3057
+#define PCI_DEVICE_ID_VIA_82C686_5 0x3058
+#define PCI_DEVICE_ID_VIA_8233_5 0x3059
+#define PCI_DEVICE_ID_VIA_8233_7 0x3065
+#define PCI_DEVICE_ID_VIA_82C686_6 0x3068
+#define PCI_DEVICE_ID_VIA_8233_0 0x3074
+#define PCI_DEVICE_ID_VIA_8633_0 0x3091
+#define PCI_DEVICE_ID_VIA_8367_0 0x3099
+#define PCI_DEVICE_ID_VIA_8622 0x3102
+#define PCI_DEVICE_ID_VIA_8233C_0 0x3109
+#define PCI_DEVICE_ID_VIA_8361 0x3112
+#define PCI_DEVICE_ID_VIA_8233A 0x3147
+#define PCI_DEVICE_ID_VIA_P4X333 0x3168
+#define PCI_DEVICE_ID_VIA_8235 0x3177
+#define PCI_DEVICE_ID_VIA_8377_0 0x3189
+#define PCI_DEVICE_ID_VIA_8377_0 0x3189
+#define PCI_DEVICE_ID_VIA_86C100A 0x6100
+#define PCI_DEVICE_ID_VIA_8231 0x8231
+#define PCI_DEVICE_ID_VIA_8231_4 0x8235
+#define PCI_DEVICE_ID_VIA_8365_1 0x8305
+#define PCI_DEVICE_ID_VIA_8371_1 0x8391
+#define PCI_DEVICE_ID_VIA_8501_1 0x8501
+#define PCI_DEVICE_ID_VIA_82C597_1 0x8597
+#define PCI_DEVICE_ID_VIA_82C598_1 0x8598
+#define PCI_DEVICE_ID_VIA_8601_1 0x8601
+#define PCI_DEVICE_ID_VIA_8505_1 0x8605
+#define PCI_DEVICE_ID_VIA_8633_1 0xB091
+#define PCI_DEVICE_ID_VIA_8367_1 0xB099
+
+#define PCI_VENDOR_ID_SIEMENS 0x110A
+#define PCI_DEVICE_ID_SIEMENS_DSCC4 0x2102
+
+#define PCI_VENDOR_ID_SMC2 0x1113
+#define PCI_DEVICE_ID_SMC2_1211TX 0x1211
+
+#define PCI_VENDOR_ID_VORTEX 0x1119
+#define PCI_DEVICE_ID_VORTEX_GDT60x0 0x0000
+#define PCI_DEVICE_ID_VORTEX_GDT6000B 0x0001
+#define PCI_DEVICE_ID_VORTEX_GDT6x10 0x0002
+#define PCI_DEVICE_ID_VORTEX_GDT6x20 0x0003
+#define PCI_DEVICE_ID_VORTEX_GDT6530 0x0004
+#define PCI_DEVICE_ID_VORTEX_GDT6550 0x0005
+#define PCI_DEVICE_ID_VORTEX_GDT6x17 0x0006
+#define PCI_DEVICE_ID_VORTEX_GDT6x27 0x0007
+#define PCI_DEVICE_ID_VORTEX_GDT6537 0x0008
+#define PCI_DEVICE_ID_VORTEX_GDT6557 0x0009
+#define PCI_DEVICE_ID_VORTEX_GDT6x15 0x000a
+#define PCI_DEVICE_ID_VORTEX_GDT6x25 0x000b
+#define PCI_DEVICE_ID_VORTEX_GDT6535 0x000c
+#define PCI_DEVICE_ID_VORTEX_GDT6555 0x000d
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP 0x0100
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP 0x0101
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP 0x0102
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP 0x0103
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP 0x0104
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP 0x0105
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP1 0x0110
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP1 0x0111
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP1 0x0112
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP1 0x0113
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP1 0x0114
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP1 0x0115
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP2 0x0120
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP2 0x0121
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP2 0x0122
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP2 0x0123
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP2 0x0124
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP2 0x0125
+
+#define PCI_VENDOR_ID_EF 0x111a
+#define PCI_DEVICE_ID_EF_ATM_FPGA 0x0000
+#define PCI_DEVICE_ID_EF_ATM_ASIC 0x0002
+
+#define PCI_VENDOR_ID_IDT 0x111d
+#define PCI_DEVICE_ID_IDT_IDT77201 0x0001
+
+#define PCI_VENDOR_ID_FORE 0x1127
+#define PCI_DEVICE_ID_FORE_PCA200PC 0x0210
+#define PCI_DEVICE_ID_FORE_PCA200E 0x0300
+
+#define PCI_VENDOR_ID_IMAGINGTECH 0x112f
+#define PCI_DEVICE_ID_IMAGINGTECH_ICPCI 0x0000
+
+#define PCI_VENDOR_ID_PHILIPS 0x1131
+#define PCI_DEVICE_ID_PHILIPS_SAA7145 0x7145
+#define PCI_DEVICE_ID_PHILIPS_SAA7146 0x7146
+#define PCI_DEVICE_ID_PHILIPS_SAA9730 0x9730
+
+#define PCI_VENDOR_ID_EICON 0x1133
+#define PCI_DEVICE_ID_EICON_DIVA20PRO 0xe001
+#define PCI_DEVICE_ID_EICON_DIVA20 0xe002
+#define PCI_DEVICE_ID_EICON_DIVA20PRO_U 0xe003
+#define PCI_DEVICE_ID_EICON_DIVA20_U 0xe004
+#define PCI_DEVICE_ID_EICON_DIVA201 0xe005
+#define PCI_DEVICE_ID_EICON_DIVA202 0xe00b
+#define PCI_DEVICE_ID_EICON_MAESTRA 0xe010
+#define PCI_DEVICE_ID_EICON_MAESTRAQ 0xe012
+#define PCI_DEVICE_ID_EICON_MAESTRAQ_U 0xe013
+#define PCI_DEVICE_ID_EICON_MAESTRAP 0xe014
+
+#define PCI_VENDOR_ID_CYCLONE 0x113c
+#define PCI_DEVICE_ID_CYCLONE_SDK 0x0001
+
+#define PCI_VENDOR_ID_ALLIANCE 0x1142
+#define PCI_DEVICE_ID_ALLIANCE_PROMOTIO 0x3210
+#define PCI_DEVICE_ID_ALLIANCE_PROVIDEO 0x6422
+#define PCI_DEVICE_ID_ALLIANCE_AT24 0x6424
+#define PCI_DEVICE_ID_ALLIANCE_AT3D 0x643d
+
+#define PCI_VENDOR_ID_SYSKONNECT 0x1148
+#define PCI_DEVICE_ID_SYSKONNECT_FP 0x4000
+#define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200
+#define PCI_DEVICE_ID_SYSKONNECT_GE 0x4300
+
+#define PCI_VENDOR_ID_VMIC 0x114a
+#define PCI_DEVICE_ID_VMIC_VME 0x7587
+
+#define PCI_VENDOR_ID_DIGI 0x114f
+#define PCI_DEVICE_ID_DIGI_EPC 0x0002
+#define PCI_DEVICE_ID_DIGI_RIGHTSWITCH 0x0003
+#define PCI_DEVICE_ID_DIGI_XEM 0x0004
+#define PCI_DEVICE_ID_DIGI_XR 0x0005
+#define PCI_DEVICE_ID_DIGI_CX 0x0006
+#define PCI_DEVICE_ID_DIGI_XRJ 0x0009
+#define PCI_DEVICE_ID_DIGI_EPCJ 0x000a
+#define PCI_DEVICE_ID_DIGI_XR_920 0x0027
+#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_E 0x0070
+#define PCI_DEVICE_ID_DIGI_DF_M_E 0x0071
+#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A 0x0072
+#define PCI_DEVICE_ID_DIGI_DF_M_A 0x0073
+
+#define PCI_VENDOR_ID_MUTECH 0x1159
+#define PCI_DEVICE_ID_MUTECH_MV1000 0x0001
+
+#define PCI_VENDOR_ID_XIRCOM 0x115d
+#define PCI_DEVICE_ID_XIRCOM_X3201_ETH 0x0003
+#define PCI_DEVICE_ID_XIRCOM_X3201_MDM 0x0103
+
+#define PCI_VENDOR_ID_RENDITION 0x1163
+#define PCI_DEVICE_ID_RENDITION_VERITE 0x0001
+#define PCI_DEVICE_ID_RENDITION_VERITE2100 0x2000
+
+#define PCI_VENDOR_ID_SERVERWORKS 0x1166
+#define PCI_DEVICE_ID_SERVERWORKS_HE 0x0008
+#define PCI_DEVICE_ID_SERVERWORKS_LE 0x0009
+#define PCI_DEVICE_ID_SERVERWORKS_CIOB30 0x0010
+#define PCI_DEVICE_ID_SERVERWORKS_CMIC_HE 0x0011
+#define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4 0x0200
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5 0x0201
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6 0x0203
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4IDE 0x0211
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5IDE 0x0212
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE 0x0213
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2 0x0217
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6USB 0x0221
+#define PCI_DEVICE_ID_SERVERWORKS_GCLE 0x0225
+#define PCI_DEVICE_ID_SERVERWORKS_GCLE2 0x0227
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5ISA 0x0230
+
+#define PCI_VENDOR_ID_SBE 0x1176
+#define PCI_DEVICE_ID_SBE_WANXL100 0x0301
+#define PCI_DEVICE_ID_SBE_WANXL200 0x0302
+#define PCI_DEVICE_ID_SBE_WANXL400 0x0104
+
+#define PCI_VENDOR_ID_TOSHIBA 0x1179
+#define PCI_DEVICE_ID_TOSHIBA_601 0x0601
+#define PCI_DEVICE_ID_TOSHIBA_TOPIC95 0x060a
+#define PCI_DEVICE_ID_TOSHIBA_TOPIC97 0x060f
+
+#define PCI_VENDOR_ID_TOSHIBA_2 0x102f
+#define PCI_DEVICE_ID_TOSHIBA_TX3927 0x000a
+#define PCI_DEVICE_ID_TOSHIBA_TC35815CF 0x0030
+#define PCI_DEVICE_ID_TOSHIBA_TX4927 0x0180
+
+#define PCI_VENDOR_ID_RICOH 0x1180
+#define PCI_DEVICE_ID_RICOH_RL5C465 0x0465
+#define PCI_DEVICE_ID_RICOH_RL5C466 0x0466
+#define PCI_DEVICE_ID_RICOH_RL5C475 0x0475
+#define PCI_DEVICE_ID_RICOH_RL5C476 0x0476
+#define PCI_DEVICE_ID_RICOH_RL5C478 0x0478
+
+#define PCI_VENDOR_ID_ARTOP 0x1191
+#define PCI_DEVICE_ID_ARTOP_ATP8400 0x0004
+#define PCI_DEVICE_ID_ARTOP_ATP850UF 0x0005
+#define PCI_DEVICE_ID_ARTOP_ATP860 0x0006
+#define PCI_DEVICE_ID_ARTOP_ATP860R 0x0007
+#define PCI_DEVICE_ID_ARTOP_ATP865 0x0008
+#define PCI_DEVICE_ID_ARTOP_ATP865R 0x0009
+#define PCI_DEVICE_ID_ARTOP_AEC7610 0x8002
+#define PCI_DEVICE_ID_ARTOP_AEC7612UW 0x8010
+#define PCI_DEVICE_ID_ARTOP_AEC7612U 0x8020
+#define PCI_DEVICE_ID_ARTOP_AEC7612S 0x8030
+#define PCI_DEVICE_ID_ARTOP_AEC7612D 0x8040
+#define PCI_DEVICE_ID_ARTOP_AEC7612SUW 0x8050
+#define PCI_DEVICE_ID_ARTOP_8060 0x8060
+
+#define PCI_VENDOR_ID_ZEITNET 0x1193
+#define PCI_DEVICE_ID_ZEITNET_1221 0x0001
+#define PCI_DEVICE_ID_ZEITNET_1225 0x0002
+
+#define PCI_VENDOR_ID_OMEGA 0x119b
+#define PCI_DEVICE_ID_OMEGA_82C092G 0x1221
+
+#define PCI_VENDOR_ID_FUJITSU_ME 0x119e
+#define PCI_DEVICE_ID_FUJITSU_FS155 0x0001
+#define PCI_DEVICE_ID_FUJITSU_FS50 0x0003
+
+#define PCI_SUBVENDOR_ID_KEYSPAN 0x11a9
+#define PCI_SUBDEVICE_ID_KEYSPAN_SX2 0x5334
+
+#define PCI_VENDOR_ID_GALILEO 0x11ab
+#define PCI_DEVICE_ID_GALILEO_GT64011 0x4146
+#define PCI_DEVICE_ID_GALILEO_GT64111 0x4146
+#define PCI_DEVICE_ID_GALILEO_GT96100 0x9652
+#define PCI_DEVICE_ID_GALILEO_GT96100A 0x9653
+
+#define PCI_VENDOR_ID_LITEON 0x11ad
+#define PCI_DEVICE_ID_LITEON_LNE100TX 0x0002
+
+#define PCI_VENDOR_ID_V3 0x11b0
+#define PCI_DEVICE_ID_V3_V960 0x0001
+#define PCI_DEVICE_ID_V3_V350 0x0001
+#define PCI_DEVICE_ID_V3_V961 0x0002
+#define PCI_DEVICE_ID_V3_V351 0x0002
+
+#define PCI_VENDOR_ID_NP 0x11bc
+#define PCI_DEVICE_ID_NP_PCI_FDDI 0x0001
+
+#define PCI_VENDOR_ID_ATT 0x11c1
+#define PCI_DEVICE_ID_ATT_L56XMF 0x0440
+#define PCI_DEVICE_ID_ATT_VENUS_MODEM 0x480
+
+#define PCI_VENDOR_ID_SPECIALIX 0x11cb
+#define PCI_DEVICE_ID_SPECIALIX_IO8 0x2000
+#define PCI_DEVICE_ID_SPECIALIX_XIO 0x4000
+#define PCI_DEVICE_ID_SPECIALIX_RIO 0x8000
+#define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004
+
+#define PCI_VENDOR_ID_AURAVISION 0x11d1
+#define PCI_DEVICE_ID_AURAVISION_VXP524 0x01f7
+
+#define PCI_VENDOR_ID_ANALOG_DEVICES 0x11d4
+#define PCI_DEVICE_ID_AD1889JS 0x1889
+
+#define PCI_VENDOR_ID_IKON 0x11d5
+#define PCI_DEVICE_ID_IKON_10115 0x0115
+#define PCI_DEVICE_ID_IKON_10117 0x0117
+
+#define PCI_VENDOR_ID_ZORAN 0x11de
+#define PCI_DEVICE_ID_ZORAN_36057 0x6057
+#define PCI_DEVICE_ID_ZORAN_36120 0x6120
+
+#define PCI_VENDOR_ID_KINETIC 0x11f4
+#define PCI_DEVICE_ID_KINETIC_2915 0x2915
+
+#define PCI_VENDOR_ID_COMPEX 0x11f6
+#define PCI_DEVICE_ID_COMPEX_ENET100VG4 0x0112
+#define PCI_DEVICE_ID_COMPEX_RL2000 0x1401
+
+#define PCI_VENDOR_ID_RP 0x11fe
+#define PCI_DEVICE_ID_RP32INTF 0x0001
+#define PCI_DEVICE_ID_RP8INTF 0x0002
+#define PCI_DEVICE_ID_RP16INTF 0x0003
+#define PCI_DEVICE_ID_RP4QUAD 0x0004
+#define PCI_DEVICE_ID_RP8OCTA 0x0005
+#define PCI_DEVICE_ID_RP8J 0x0006
+#define PCI_DEVICE_ID_RPP4 0x000A
+#define PCI_DEVICE_ID_RPP8 0x000B
+#define PCI_DEVICE_ID_RP8M 0x000C
+
+#define PCI_VENDOR_ID_CYCLADES 0x120e
+#define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100
+#define PCI_DEVICE_ID_CYCLOM_Y_Hi 0x0101
+#define PCI_DEVICE_ID_CYCLOM_4Y_Lo 0x0102
+#define PCI_DEVICE_ID_CYCLOM_4Y_Hi 0x0103
+#define PCI_DEVICE_ID_CYCLOM_8Y_Lo 0x0104
+#define PCI_DEVICE_ID_CYCLOM_8Y_Hi 0x0105
+#define PCI_DEVICE_ID_CYCLOM_Z_Lo 0x0200
+#define PCI_DEVICE_ID_CYCLOM_Z_Hi 0x0201
+#define PCI_DEVICE_ID_PC300_RX_2 0x0300
+#define PCI_DEVICE_ID_PC300_RX_1 0x0301
+#define PCI_DEVICE_ID_PC300_TE_2 0x0310
+#define PCI_DEVICE_ID_PC300_TE_1 0x0311
+
+#define PCI_VENDOR_ID_ESSENTIAL 0x120f
+#define PCI_DEVICE_ID_ESSENTIAL_ROADRUNNER 0x0001
+
+#define PCI_VENDOR_ID_O2 0x1217
+#define PCI_DEVICE_ID_O2_6729 0x6729
+#define PCI_DEVICE_ID_O2_6730 0x673a
+#define PCI_DEVICE_ID_O2_6832 0x6832
+#define PCI_DEVICE_ID_O2_6836 0x6836
+
+#define PCI_VENDOR_ID_3DFX 0x121a
+#define PCI_DEVICE_ID_3DFX_VOODOO 0x0001
+#define PCI_DEVICE_ID_3DFX_VOODOO2 0x0002
+#define PCI_DEVICE_ID_3DFX_BANSHEE 0x0003
+#define PCI_DEVICE_ID_3DFX_VOODOO3 0x0005
+
+#define PCI_VENDOR_ID_SIGMADES 0x1236
+#define PCI_DEVICE_ID_SIGMADES_6425 0x6401
+
+#define PCI_VENDOR_ID_CCUBE 0x123f
+
+#define PCI_VENDOR_ID_AVM 0x1244
+#define PCI_DEVICE_ID_AVM_B1 0x0700
+#define PCI_DEVICE_ID_AVM_C4 0x0800
+#define PCI_DEVICE_ID_AVM_A1 0x0a00
+#define PCI_DEVICE_ID_AVM_A1_V2 0x0e00
+#define PCI_DEVICE_ID_AVM_C2 0x1100
+#define PCI_DEVICE_ID_AVM_T1 0x1200
+
+#define PCI_VENDOR_ID_DIPIX 0x1246
+
+#define PCI_VENDOR_ID_STALLION 0x124d
+#define PCI_DEVICE_ID_STALLION_ECHPCI832 0x0000
+#define PCI_DEVICE_ID_STALLION_ECHPCI864 0x0002
+#define PCI_DEVICE_ID_STALLION_EIOPCI 0x0003
+
+#define PCI_VENDOR_ID_OPTIBASE 0x1255
+#define PCI_DEVICE_ID_OPTIBASE_FORGE 0x1110
+#define PCI_DEVICE_ID_OPTIBASE_FUSION 0x1210
+#define PCI_DEVICE_ID_OPTIBASE_VPLEX 0x2110
+#define PCI_DEVICE_ID_OPTIBASE_VPLEXCC 0x2120
+#define PCI_DEVICE_ID_OPTIBASE_VQUEST 0x2130
+
+#define PCI_VENDOR_ID_ESS 0x125d
+#define PCI_DEVICE_ID_ESS_ESS1968 0x1968
+#define PCI_DEVICE_ID_ESS_AUDIOPCI 0x1969
+#define PCI_DEVICE_ID_ESS_ESS1978 0x1978
+
+#define PCI_VENDOR_ID_SATSAGEM 0x1267
+#define PCI_DEVICE_ID_SATSAGEM_NICCY 0x1016
+#define PCI_DEVICE_ID_SATSAGEM_PCR2101 0x5352
+#define PCI_DEVICE_ID_SATSAGEM_TELSATTURBO 0x5a4b
+
+#define PCI_VENDOR_ID_HUGHES 0x1273
+#define PCI_DEVICE_ID_HUGHES_DIRECPC 0x0002
+
+#define PCI_VENDOR_ID_ENSONIQ 0x1274
+#define PCI_DEVICE_ID_ENSONIQ_CT5880 0x5880
+#define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000
+#define PCI_DEVICE_ID_ENSONIQ_ES1371 0x1371
+
+#define PCI_VENDOR_ID_ROCKWELL 0x127A
+
+#define PCI_VENDOR_ID_ITE 0x1283
+#define PCI_DEVICE_ID_ITE_IT8172G 0x8172
+#define PCI_DEVICE_ID_ITE_IT8172G_AUDIO 0x0801
+#define PCI_DEVICE_ID_ITE_8872 0x8872
+
+#define PCI_DEVICE_ID_ITE_IT8330G_0 0xe886
+
+/* formerly Platform Tech */
+#define PCI_VENDOR_ID_ESS_OLD 0x1285
+#define PCI_DEVICE_ID_ESS_ESS0100 0x0100
+
+#define PCI_VENDOR_ID_ALTEON 0x12ae
+#define PCI_DEVICE_ID_ALTEON_ACENIC 0x0001
+
+#define PCI_VENDOR_ID_USR 0x12B9
+
+#define PCI_SUBVENDOR_ID_CONNECT_TECH 0x12c4
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232 0x0001
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232 0x0002
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_232 0x0003
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485 0x0004
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_4_4 0x0005
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485 0x0006
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2 0x0007
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485 0x0008
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6 0x0009
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1 0x000A
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1 0x000B
+
+#define PCI_VENDOR_ID_PICTUREL 0x12c5
+#define PCI_DEVICE_ID_PICTUREL_PCIVST 0x0081
+
+#define PCI_VENDOR_ID_NVIDIA_SGS 0x12d2
+#define PCI_DEVICE_ID_NVIDIA_SGS_RIVA128 0x0018
+
+#define PCI_SUBVENDOR_ID_CHASE_PCIFAST 0x12E0
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST4 0x0031
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST8 0x0021
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16 0x0011
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16FMC 0x0041
+#define PCI_SUBVENDOR_ID_CHASE_PCIRAS 0x124D
+#define PCI_SUBDEVICE_ID_CHASE_PCIRAS4 0xF001
+#define PCI_SUBDEVICE_ID_CHASE_PCIRAS8 0xF010
+
+#define PCI_VENDOR_ID_AUREAL 0x12eb
+#define PCI_DEVICE_ID_AUREAL_VORTEX_1 0x0001
+#define PCI_DEVICE_ID_AUREAL_VORTEX_2 0x0002
+
+#define PCI_VENDOR_ID_CBOARDS 0x1307
+#define PCI_DEVICE_ID_CBOARDS_DAS1602_16 0x0001
+
+#define PCI_VENDOR_ID_SIIG 0x131f
+#define PCI_DEVICE_ID_SIIG_1S_10x_550 0x1000
+#define PCI_DEVICE_ID_SIIG_1S_10x_650 0x1001
+#define PCI_DEVICE_ID_SIIG_1S_10x_850 0x1002
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_550 0x1010
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_650 0x1011
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_850 0x1012
+#define PCI_DEVICE_ID_SIIG_1P_10x 0x1020
+#define PCI_DEVICE_ID_SIIG_2P_10x 0x1021
+#define PCI_DEVICE_ID_SIIG_2S_10x_550 0x1030
+#define PCI_DEVICE_ID_SIIG_2S_10x_650 0x1031
+#define PCI_DEVICE_ID_SIIG_2S_10x_850 0x1032
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_550 0x1034
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_650 0x1035
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_850 0x1036
+#define PCI_DEVICE_ID_SIIG_4S_10x_550 0x1050
+#define PCI_DEVICE_ID_SIIG_4S_10x_650 0x1051
+#define PCI_DEVICE_ID_SIIG_4S_10x_850 0x1052
+#define PCI_DEVICE_ID_SIIG_1S_20x_550 0x2000
+#define PCI_DEVICE_ID_SIIG_1S_20x_650 0x2001
+#define PCI_DEVICE_ID_SIIG_1S_20x_850 0x2002
+#define PCI_DEVICE_ID_SIIG_1P_20x 0x2020
+#define PCI_DEVICE_ID_SIIG_2P_20x 0x2021
+#define PCI_DEVICE_ID_SIIG_2S_20x_550 0x2030
+#define PCI_DEVICE_ID_SIIG_2S_20x_650 0x2031
+#define PCI_DEVICE_ID_SIIG_2S_20x_850 0x2032
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_550 0x2040
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_650 0x2041
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_850 0x2042
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_550 0x2010
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_650 0x2011
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_850 0x2012
+#define PCI_DEVICE_ID_SIIG_4S_20x_550 0x2050
+#define PCI_DEVICE_ID_SIIG_4S_20x_650 0x2051
+#define PCI_DEVICE_ID_SIIG_4S_20x_850 0x2052
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_550 0x2060
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_650 0x2061
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_850 0x2062
+
+#define PCI_VENDOR_ID_DOMEX 0x134a
+#define PCI_DEVICE_ID_DOMEX_DMX3191D 0x0001
+
+#define PCI_VENDOR_ID_QUATECH 0x135C
+#define PCI_DEVICE_ID_QUATECH_QSC100 0x0010
+#define PCI_DEVICE_ID_QUATECH_DSC100 0x0020
+#define PCI_DEVICE_ID_QUATECH_DSC200 0x0030
+#define PCI_DEVICE_ID_QUATECH_QSC200 0x0040
+#define PCI_DEVICE_ID_QUATECH_ESC100D 0x0050
+#define PCI_DEVICE_ID_QUATECH_ESC100M 0x0060
+
+#define PCI_VENDOR_ID_SEALEVEL 0x135e
+#define PCI_DEVICE_ID_SEALEVEL_U530 0x7101
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM2 0x7201
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM422 0x7402
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM232 0x7202
+#define PCI_DEVICE_ID_SEALEVEL_COMM4 0x7401
+#define PCI_DEVICE_ID_SEALEVEL_COMM8 0x7801
+
+#define PCI_VENDOR_ID_HYPERCOPE 0x1365
+#define PCI_DEVICE_ID_HYPERCOPE_PLX 0x9050
+#define PCI_SUBDEVICE_ID_HYPERCOPE_OLD_ERGO 0x0104
+#define PCI_SUBDEVICE_ID_HYPERCOPE_ERGO 0x0106
+#define PCI_SUBDEVICE_ID_HYPERCOPE_METRO 0x0107
+#define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2 0x0108
+#define PCI_SUBDEVICE_ID_HYPERCOPE_PLEXUS 0x0109
+
+#define PCI_VENDOR_ID_KAWASAKI 0x136b
+#define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01
+
+#define PCI_VENDOR_ID_LMC 0x1376
+#define PCI_DEVICE_ID_LMC_HSSI 0x0003
+#define PCI_DEVICE_ID_LMC_DS3 0x0004
+#define PCI_DEVICE_ID_LMC_SSI 0x0005
+#define PCI_DEVICE_ID_LMC_T1 0x0006
+
+#define PCI_VENDOR_ID_NETGEAR 0x1385
+#define PCI_DEVICE_ID_NETGEAR_GA620 0x620a
+#define PCI_DEVICE_ID_NETGEAR_GA622 0x622a
+
+#define PCI_VENDOR_ID_APPLICOM 0x1389
+#define PCI_DEVICE_ID_APPLICOM_PCIGENERIC 0x0001
+#define PCI_DEVICE_ID_APPLICOM_PCI2000IBS_CAN 0x0002
+#define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003
+
+#define PCI_VENDOR_ID_MOXA 0x1393
+#define PCI_DEVICE_ID_MOXA_C104 0x1040
+#define PCI_DEVICE_ID_MOXA_C168 0x1680
+#define PCI_DEVICE_ID_MOXA_CP204J 0x2040
+#define PCI_DEVICE_ID_MOXA_C218 0x2180
+#define PCI_DEVICE_ID_MOXA_C320 0x3200
+
+#define PCI_VENDOR_ID_CCD 0x1397
+#define PCI_DEVICE_ID_CCD_2BD0 0x2bd0
+#define PCI_DEVICE_ID_CCD_B000 0xb000
+#define PCI_DEVICE_ID_CCD_B006 0xb006
+#define PCI_DEVICE_ID_CCD_B007 0xb007
+#define PCI_DEVICE_ID_CCD_B008 0xb008
+#define PCI_DEVICE_ID_CCD_B009 0xb009
+#define PCI_DEVICE_ID_CCD_B00A 0xb00a
+#define PCI_DEVICE_ID_CCD_B00B 0xb00b
+#define PCI_DEVICE_ID_CCD_B00C 0xb00c
+#define PCI_DEVICE_ID_CCD_B100 0xb100
+
+#define PCI_VENDOR_ID_3WARE 0x13C1
+#define PCI_DEVICE_ID_3WARE_1000 0x1000
+
+#define PCI_VENDOR_ID_ABOCOM 0x13D1
+#define PCI_DEVICE_ID_ABOCOM_2BD1 0x2BD1
+
+#define PCI_VENDOR_ID_CMEDIA 0x13f6
+#define PCI_DEVICE_ID_CMEDIA_CM8338A 0x0100
+#define PCI_DEVICE_ID_CMEDIA_CM8338B 0x0101
+#define PCI_DEVICE_ID_CMEDIA_CM8738 0x0111
+#define PCI_DEVICE_ID_CMEDIA_CM8738B 0x0112
+
+#define PCI_VENDOR_ID_LAVA 0x1407
+#define PCI_DEVICE_ID_LAVA_DSERIAL 0x0100 /* 2x 16550 */
+#define PCI_DEVICE_ID_LAVA_QUATRO_A 0x0101 /* 2x 16550, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUATRO_B 0x0102 /* 2x 16550, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_OCTO_A 0x0180 /* 4x 16550A, half of 8 port */
+#define PCI_DEVICE_ID_LAVA_OCTO_B 0x0181 /* 4x 16550A, half of 8 port */
+#define PCI_DEVICE_ID_LAVA_PORT_PLUS 0x0200 /* 2x 16650 */
+#define PCI_DEVICE_ID_LAVA_QUAD_A 0x0201 /* 2x 16650, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUAD_B 0x0202 /* 2x 16650, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_SSERIAL 0x0500 /* 1x 16550 */
+#define PCI_DEVICE_ID_LAVA_PORT_650 0x0600 /* 1x 16650 */
+#define PCI_DEVICE_ID_LAVA_PARALLEL 0x8000
+#define PCI_DEVICE_ID_LAVA_DUAL_PAR_A 0x8002 /* The Lava Dual Parallel is */
+#define PCI_DEVICE_ID_LAVA_DUAL_PAR_B 0x8003 /* two PCI devices on a card */
+#define PCI_DEVICE_ID_LAVA_BOCA_IOPPAR 0x8800
+
+#define PCI_VENDOR_ID_TIMEDIA 0x1409
+#define PCI_DEVICE_ID_TIMEDIA_1889 0x7168
+
+#define PCI_VENDOR_ID_OXSEMI 0x1415
+#define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403
+#define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501
+#define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511
+#define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513
+#define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521
+
+#define PCI_VENDOR_ID_AIRONET 0x14b9
+#define PCI_DEVICE_ID_AIRONET_4800_1 0x0001
+#define PCI_DEVICE_ID_AIRONET_4800 0x4500 // values switched? see
+#define PCI_DEVICE_ID_AIRONET_4500 0x4800 // drivers/net/aironet4500_card.c
+
+#define PCI_VENDOR_ID_TITAN 0x14D2
+#define PCI_DEVICE_ID_TITAN_010L 0x8001
+#define PCI_DEVICE_ID_TITAN_100L 0x8010
+#define PCI_DEVICE_ID_TITAN_110L 0x8011
+#define PCI_DEVICE_ID_TITAN_200L 0x8020
+#define PCI_DEVICE_ID_TITAN_210L 0x8021
+#define PCI_DEVICE_ID_TITAN_400L 0x8040
+#define PCI_DEVICE_ID_TITAN_800L 0x8080
+#define PCI_DEVICE_ID_TITAN_100 0xA001
+#define PCI_DEVICE_ID_TITAN_200 0xA005
+#define PCI_DEVICE_ID_TITAN_400 0xA003
+#define PCI_DEVICE_ID_TITAN_800B 0xA004
+
+#define PCI_VENDOR_ID_PANACOM 0x14d4
+#define PCI_DEVICE_ID_PANACOM_QUADMODEM 0x0400
+#define PCI_DEVICE_ID_PANACOM_DUALMODEM 0x0402
+
+#define PCI_VENDOR_ID_AFAVLAB 0x14db
+#define PCI_DEVICE_ID_AFAVLAB_P028 0x2180
+
+#define PCI_VENDOR_ID_BROADCOM 0x14e4
+#define PCI_DEVICE_ID_TIGON3_5700 0x1644
+#define PCI_DEVICE_ID_TIGON3_5701 0x1645
+#define PCI_DEVICE_ID_TIGON3_5702 0x1646
+#define PCI_DEVICE_ID_TIGON3_5703 0x1647
+#define PCI_DEVICE_ID_TIGON3_5704 0x1648
+#define PCI_DEVICE_ID_TIGON3_5702FE 0x164d
+#define PCI_DEVICE_ID_TIGON3_5702X 0x16a6
+#define PCI_DEVICE_ID_TIGON3_5703X 0x16a7
+
+#define PCI_VENDOR_ID_SYBA 0x1592
+#define PCI_DEVICE_ID_SYBA_2P_EPP 0x0782
+#define PCI_DEVICE_ID_SYBA_1P_ECP 0x0783
+
+#define PCI_VENDOR_ID_MORETON 0x15aa
+#define PCI_DEVICE_ID_RASTEL_2PORT 0x2000
+
+#define PCI_VENDOR_ID_ZOLTRIX 0x15b0
+#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0
+
+#define PCI_VENDOR_ID_PDC 0x15e9
+#define PCI_DEVICE_ID_PDC_1841 0x1841
+
+#define PCI_VENDOR_ID_ALTIMA 0x173b
+#define PCI_DEVICE_ID_ALTIMA_AC1000 0x03e8
+#define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea
+
+#define PCI_VENDOR_ID_SYMPHONY 0x1c1c
+#define PCI_DEVICE_ID_SYMPHONY_101 0x0001
+
+#define PCI_VENDOR_ID_TEKRAM 0x1de1
+#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
+
+#define PCI_VENDOR_ID_HINT 0x3388
+#define PCI_DEVICE_ID_HINT_VXPROII_IDE 0x8013
+
+#define PCI_VENDOR_ID_3DLABS 0x3d3d
+#define PCI_DEVICE_ID_3DLABS_300SX 0x0001
+#define PCI_DEVICE_ID_3DLABS_500TX 0x0002
+#define PCI_DEVICE_ID_3DLABS_DELTA 0x0003
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA 0x0004
+#define PCI_DEVICE_ID_3DLABS_MX 0x0006
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007
+#define PCI_DEVICE_ID_3DLABS_GAMMA 0x0008
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA2V 0x0009
+
+#define PCI_VENDOR_ID_AVANCE 0x4005
+#define PCI_DEVICE_ID_AVANCE_ALG2064 0x2064
+#define PCI_DEVICE_ID_AVANCE_2302 0x2302
+
+#define PCI_VENDOR_ID_AKS 0x416c
+#define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100
+#define PCI_DEVICE_ID_AKS_CPC 0x0200
+
+#define PCI_VENDOR_ID_NETVIN 0x4a14
+#define PCI_DEVICE_ID_NETVIN_NV5000SC 0x5000
+
+#define PCI_VENDOR_ID_S3 0x5333
+#define PCI_DEVICE_ID_S3_PLATO_PXS 0x0551
+#define PCI_DEVICE_ID_S3_ViRGE 0x5631
+#define PCI_DEVICE_ID_S3_TRIO 0x8811
+#define PCI_DEVICE_ID_S3_AURORA64VP 0x8812
+#define PCI_DEVICE_ID_S3_TRIO64UVP 0x8814
+#define PCI_DEVICE_ID_S3_ViRGE_VX 0x883d
+#define PCI_DEVICE_ID_S3_868 0x8880
+#define PCI_DEVICE_ID_S3_928 0x88b0
+#define PCI_DEVICE_ID_S3_864_1 0x88c0
+#define PCI_DEVICE_ID_S3_864_2 0x88c1
+#define PCI_DEVICE_ID_S3_964_1 0x88d0
+#define PCI_DEVICE_ID_S3_964_2 0x88d1
+#define PCI_DEVICE_ID_S3_968 0x88f0
+#define PCI_DEVICE_ID_S3_TRIO64V2 0x8901
+#define PCI_DEVICE_ID_S3_PLATO_PXG 0x8902
+#define PCI_DEVICE_ID_S3_ViRGE_DXGX 0x8a01
+#define PCI_DEVICE_ID_S3_ViRGE_GX2 0x8a10
+#define PCI_DEVICE_ID_S3_ViRGE_MX 0x8c01
+#define PCI_DEVICE_ID_S3_ViRGE_MXP 0x8c02
+#define PCI_DEVICE_ID_S3_ViRGE_MXPMV 0x8c03
+#define PCI_DEVICE_ID_S3_SONICVIBES 0xca00
+
+#define PCI_VENDOR_ID_DUNORD 0x5544
+#define PCI_DEVICE_ID_DUNORD_I3000 0x0001
+#define PCI_VENDOR_ID_GENROCO 0x5555
+#define PCI_DEVICE_ID_GENROCO_HFP832 0x0003
+
+#define PCI_VENDOR_ID_DCI 0x6666
+#define PCI_DEVICE_ID_DCI_PCCOM4 0x0001
+#define PCI_DEVICE_ID_DCI_PCCOM8 0x0002
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_DEVICE_ID_INTEL_21145 0x0039
+#define PCI_DEVICE_ID_INTEL_82375 0x0482
+#define PCI_DEVICE_ID_INTEL_82424 0x0483
+#define PCI_DEVICE_ID_INTEL_82378 0x0484
+#define PCI_DEVICE_ID_INTEL_82430 0x0486
+#define PCI_DEVICE_ID_INTEL_82434 0x04a3
+#define PCI_DEVICE_ID_INTEL_I960 0x0960
+#define PCI_DEVICE_ID_INTEL_I960RM 0x0962
+#define PCI_DEVICE_ID_INTEL_82562ET 0x1031
+
+#define PCI_DEVICE_ID_INTEL_82815_MC 0x1130
+
+#define PCI_DEVICE_ID_INTEL_82559ER 0x1209
+#define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221
+#define PCI_DEVICE_ID_INTEL_82092AA_1 0x1222
+#define PCI_DEVICE_ID_INTEL_7116 0x1223
+#define PCI_DEVICE_ID_INTEL_82596 0x1226
+#define PCI_DEVICE_ID_INTEL_82865 0x1227
+#define PCI_DEVICE_ID_INTEL_82557 0x1229
+#define PCI_DEVICE_ID_INTEL_82437 0x122d
+#define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e
+#define PCI_DEVICE_ID_INTEL_82371FB_1 0x1230
+#define PCI_DEVICE_ID_INTEL_82371MX 0x1234
+#define PCI_DEVICE_ID_INTEL_82437MX 0x1235
+#define PCI_DEVICE_ID_INTEL_82441 0x1237
+#define PCI_DEVICE_ID_INTEL_82380FB 0x124b
+#define PCI_DEVICE_ID_INTEL_82439 0x1250
+#define PCI_DEVICE_ID_INTEL_80960_RP 0x1960
+#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000
+#define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010
+#define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020
+#define PCI_DEVICE_ID_INTEL_82437VX 0x7030
+#define PCI_DEVICE_ID_INTEL_82439TX 0x7100
+#define PCI_DEVICE_ID_INTEL_82371AB_0 0x7110
+#define PCI_DEVICE_ID_INTEL_82371AB 0x7111
+#define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112
+#define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113
+#define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410
+#define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411
+#define PCI_DEVICE_ID_INTEL_82801AA_2 0x2412
+#define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413
+#define PCI_DEVICE_ID_INTEL_82801AA_5 0x2415
+#define PCI_DEVICE_ID_INTEL_82801AA_6 0x2416
+#define PCI_DEVICE_ID_INTEL_82801AA_8 0x2418
+#define PCI_DEVICE_ID_INTEL_82801AB_0 0x2420
+#define PCI_DEVICE_ID_INTEL_82801AB_1 0x2421
+#define PCI_DEVICE_ID_INTEL_82801AB_2 0x2422
+#define PCI_DEVICE_ID_INTEL_82801AB_3 0x2423
+#define PCI_DEVICE_ID_INTEL_82801AB_5 0x2425
+#define PCI_DEVICE_ID_INTEL_82801AB_6 0x2426
+#define PCI_DEVICE_ID_INTEL_82801AB_8 0x2428
+#define PCI_DEVICE_ID_INTEL_82801BA_0 0x2440
+#define PCI_DEVICE_ID_INTEL_82801BA_1 0x2442
+#define PCI_DEVICE_ID_INTEL_82801BA_2 0x2443
+#define PCI_DEVICE_ID_INTEL_82801BA_3 0x2444
+#define PCI_DEVICE_ID_INTEL_82801BA_4 0x2445
+#define PCI_DEVICE_ID_INTEL_82801BA_5 0x2446
+#define PCI_DEVICE_ID_INTEL_82801BA_6 0x2448
+#define PCI_DEVICE_ID_INTEL_82801BA_7 0x2449
+#define PCI_DEVICE_ID_INTEL_82801BA_8 0x244a
+#define PCI_DEVICE_ID_INTEL_82801BA_9 0x244b
+#define PCI_DEVICE_ID_INTEL_82801BA_10 0x244c
+#define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e
+#define PCI_DEVICE_ID_INTEL_82801E_0 0x2450
+#define PCI_DEVICE_ID_INTEL_82801E_2 0x2452
+#define PCI_DEVICE_ID_INTEL_82801E_3 0x2453
+#define PCI_DEVICE_ID_INTEL_82801E_9 0x2459
+#define PCI_DEVICE_ID_INTEL_82801E_11 0x245B
+#define PCI_DEVICE_ID_INTEL_82801E_14 0x245D
+#define PCI_DEVICE_ID_INTEL_82801E_15 0x245E
+#define PCI_DEVICE_ID_INTEL_82801CA_0 0x2480
+#define PCI_DEVICE_ID_INTEL_82801CA_2 0x2482
+#define PCI_DEVICE_ID_INTEL_82801CA_3 0x2483
+#define PCI_DEVICE_ID_INTEL_82801CA_4 0x2484
+#define PCI_DEVICE_ID_INTEL_82801CA_5 0x2485
+#define PCI_DEVICE_ID_INTEL_82801CA_6 0x2486
+#define PCI_DEVICE_ID_INTEL_82801CA_7 0x2487
+#define PCI_DEVICE_ID_INTEL_82801CA_10 0x248a
+#define PCI_DEVICE_ID_INTEL_82801CA_11 0x248b
+#define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c
+#define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0
+#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2
+#define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3
+#define PCI_DEVICE_ID_INTEL_82801DB_4 0x24c4
+#define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5
+#define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6
+#define PCI_DEVICE_ID_INTEL_82801DB_7 0x24c7
+#define PCI_DEVICE_ID_INTEL_82801DB_11 0x24cb
+#define PCI_DEVICE_ID_INTEL_82801DB_13 0x24cd
+#define PCI_DEVICE_ID_INTEL_80310 0x530d
+#define PCI_DEVICE_ID_INTEL_82810_MC1 0x7120
+#define PCI_DEVICE_ID_INTEL_82810_IG1 0x7121
+#define PCI_DEVICE_ID_INTEL_82810_MC3 0x7122
+#define PCI_DEVICE_ID_INTEL_82810_IG3 0x7123
+#define PCI_DEVICE_ID_INTEL_82443LX_0 0x7180
+#define PCI_DEVICE_ID_INTEL_82443LX_1 0x7181
+#define PCI_DEVICE_ID_INTEL_82443BX_0 0x7190
+#define PCI_DEVICE_ID_INTEL_82443BX_1 0x7191
+#define PCI_DEVICE_ID_INTEL_82443BX_2 0x7192
+#define PCI_DEVICE_ID_INTEL_82443MX_0 0x7198
+#define PCI_DEVICE_ID_INTEL_82443MX_1 0x7199
+#define PCI_DEVICE_ID_INTEL_82443MX_2 0x719a
+#define PCI_DEVICE_ID_INTEL_82443MX_3 0x719b
+#define PCI_DEVICE_ID_INTEL_82372FB_0 0x7600
+#define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601
+#define PCI_DEVICE_ID_INTEL_82372FB_2 0x7602
+#define PCI_DEVICE_ID_INTEL_82372FB_3 0x7603
+#define PCI_DEVICE_ID_INTEL_82454GX 0x84c4
+#define PCI_DEVICE_ID_INTEL_82450GX 0x84c5
+#define PCI_DEVICE_ID_INTEL_82451NX 0x84ca
+
+#define PCI_VENDOR_ID_COMPUTONE 0x8e0e
+#define PCI_DEVICE_ID_COMPUTONE_IP2EX 0x0291
+#define PCI_DEVICE_ID_COMPUTONE_PG 0x0302
+#define PCI_SUBVENDOR_ID_COMPUTONE 0x8e0e
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG4 0x0001
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG8 0x0002
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG6 0x0003
+
+#define PCI_VENDOR_ID_KTI 0x8e2e
+#define PCI_DEVICE_ID_KTI_ET32P2 0x3000
+
+#define PCI_VENDOR_ID_ADAPTEC 0x9004
+#define PCI_DEVICE_ID_ADAPTEC_7810 0x1078
+#define PCI_DEVICE_ID_ADAPTEC_7821 0x2178
+#define PCI_DEVICE_ID_ADAPTEC_38602 0x3860
+#define PCI_DEVICE_ID_ADAPTEC_7850 0x5078
+#define PCI_DEVICE_ID_ADAPTEC_7855 0x5578
+#define PCI_DEVICE_ID_ADAPTEC_5800 0x5800
+#define PCI_DEVICE_ID_ADAPTEC_3860 0x6038
+#define PCI_DEVICE_ID_ADAPTEC_1480A 0x6075
+#define PCI_DEVICE_ID_ADAPTEC_7860 0x6078
+#define PCI_DEVICE_ID_ADAPTEC_7861 0x6178
+#define PCI_DEVICE_ID_ADAPTEC_7870 0x7078
+#define PCI_DEVICE_ID_ADAPTEC_7871 0x7178
+#define PCI_DEVICE_ID_ADAPTEC_7872 0x7278
+#define PCI_DEVICE_ID_ADAPTEC_7873 0x7378
+#define PCI_DEVICE_ID_ADAPTEC_7874 0x7478
+#define PCI_DEVICE_ID_ADAPTEC_7895 0x7895
+#define PCI_DEVICE_ID_ADAPTEC_7880 0x8078
+#define PCI_DEVICE_ID_ADAPTEC_7881 0x8178
+#define PCI_DEVICE_ID_ADAPTEC_7882 0x8278
+#define PCI_DEVICE_ID_ADAPTEC_7883 0x8378
+#define PCI_DEVICE_ID_ADAPTEC_7884 0x8478
+#define PCI_DEVICE_ID_ADAPTEC_7885 0x8578
+#define PCI_DEVICE_ID_ADAPTEC_7886 0x8678
+#define PCI_DEVICE_ID_ADAPTEC_7887 0x8778
+#define PCI_DEVICE_ID_ADAPTEC_7888 0x8878
+#define PCI_DEVICE_ID_ADAPTEC_1030 0x8b78
+
+#define PCI_VENDOR_ID_ADAPTEC2 0x9005
+#define PCI_DEVICE_ID_ADAPTEC2_2940U2 0x0010
+#define PCI_DEVICE_ID_ADAPTEC2_2930U2 0x0011
+#define PCI_DEVICE_ID_ADAPTEC2_7890B 0x0013
+#define PCI_DEVICE_ID_ADAPTEC2_7890 0x001f
+#define PCI_DEVICE_ID_ADAPTEC2_3940U2 0x0050
+#define PCI_DEVICE_ID_ADAPTEC2_3950U2D 0x0051
+#define PCI_DEVICE_ID_ADAPTEC2_7896 0x005f
+#define PCI_DEVICE_ID_ADAPTEC2_7892A 0x0080
+#define PCI_DEVICE_ID_ADAPTEC2_7892B 0x0081
+#define PCI_DEVICE_ID_ADAPTEC2_7892D 0x0083
+#define PCI_DEVICE_ID_ADAPTEC2_7892P 0x008f
+#define PCI_DEVICE_ID_ADAPTEC2_7899A 0x00c0
+#define PCI_DEVICE_ID_ADAPTEC2_7899B 0x00c1
+#define PCI_DEVICE_ID_ADAPTEC2_7899D 0x00c3
+#define PCI_DEVICE_ID_ADAPTEC2_7899P 0x00cf
+
+#define PCI_VENDOR_ID_ATRONICS 0x907f
+#define PCI_DEVICE_ID_ATRONICS_2015 0x2015
+
+#define PCI_VENDOR_ID_HOLTEK 0x9412
+#define PCI_DEVICE_ID_HOLTEK_6565 0x6565
+
+#define PCI_VENDOR_ID_NETMOS 0x9710
+#define PCI_DEVICE_ID_NETMOS_9735 0x9735
+#define PCI_DEVICE_ID_NETMOS_9835 0x9835
+
+#define PCI_SUBVENDOR_ID_EXSYS 0xd84d
+#define PCI_SUBDEVICE_ID_EXSYS_4014 0x4014
+
+#define PCI_VENDOR_ID_TIGERJET 0xe159
+#define PCI_DEVICE_ID_TIGERJET_300 0x0001
+#define PCI_DEVICE_ID_TIGERJET_100 0x0002
+
+#define PCI_VENDOR_ID_ARK 0xedd8
+#define PCI_DEVICE_ID_ARK_STING 0xa091
+#define PCI_DEVICE_ID_ARK_STINGARK 0xa099
+#define PCI_DEVICE_ID_ARK_2000MT 0xa0a1
+
+#define PCI_VENDOR_ID_MICROGATE 0x13c0
+#define PCI_DEVICE_ID_MICROGATE_USC 0x0010
+#define PCI_DEVICE_ID_MICROGATE_SCC 0x0020
+#define PCI_DEVICE_ID_MICROGATE_SCA 0x0030
diff --git a/xen/include/xeno/perfc.h b/xen/include/xeno/perfc.h
new file mode 100644
index 0000000000..31201eaa6d
--- /dev/null
+++ b/xen/include/xeno/perfc.h
@@ -0,0 +1,43 @@
+/*
+ * xen performance counters
+ */
+
+/*
+ * NOTE: new counters must be defined in xen_perf_defn.h
+ *
+ * PERFCOUNTER (counter, string) define a new performance counter
+ * PERFCOUNTER_ARRY (counter, string, size) define an array of counters
+ *
+ * unsigned long perfc_value (counter) get value of a counter
+ * unsigned long perfc_valuea (counter, index) get value of an array counter
+ * void perfc_incr (counter) increment a counter
+ * void perfc_incra (counter, index) increment an array counter
+ * void perfc_add (counter, value) add a value to a counter
+ * void perfc_adda (counter, index, value) add a value to array counter
+ * void perfc_print (counter) print out the counter
+ */
+
+#define PERFCOUNTER( var, name ) \
+unsigned long var[1];
+#define PERFCOUNTER_ARRAY( var, name, size ) \
+unsigned long var[size];
+
+struct perfcounter_t
+{
+#include <xeno/perfc_defn.h>
+};
+
+extern struct perfcounter_t perfcounters;
+extern char *perfc_name[];
+
+#define perf_value(x) perfcounters.x[0]
+#define perf_valuea(x,y) perfcounters.x[y]
+#define perf_incr(x) perfcounters.x[0]++
+#define perf_incra(x,y) perfcounters.x[y]++
+#define perf_add(x,y) perfcounters.x[0]+=(y)
+#define perf_adda(x,y,z) perfcounters.x[y]+=(z)
+
+#define perf_print(x) \
+ __perfc_print(perfcounters.x, \
+ &perfcounters.x[0] - ((unsigned long *)&perfcounters))
+
diff --git a/xen/include/xeno/perfc_defn.h b/xen/include/xeno/perfc_defn.h
new file mode 100644
index 0000000000..16ab4cd8c9
--- /dev/null
+++ b/xen/include/xeno/perfc_defn.h
@@ -0,0 +1,4 @@
+
+PERFCOUNTER( blockio_tx, "block io: messages received from tx queue" )
+PERFCOUNTER( blockio_rx, "block io: messages sent on rx queue" )
+
diff --git a/xen/include/xeno/prefetch.h b/xen/include/xeno/prefetch.h
new file mode 100644
index 0000000000..8d7d3ffeb4
--- /dev/null
+++ b/xen/include/xeno/prefetch.h
@@ -0,0 +1,60 @@
+/*
+ * Generic cache management functions. Everything is arch-specific,
+ * but this header exists to make sure the defines/functions can be
+ * used in a generic way.
+ *
+ * 2000-11-13 Arjan van de Ven <arjan@fenrus.demon.nl>
+ *
+ */
+
+#ifndef _LINUX_PREFETCH_H
+#define _LINUX_PREFETCH_H
+
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+/*
+ prefetch(x) attempts to pre-emptively get the memory pointed to
+ by address "x" into the CPU L1 cache.
+ prefetch(x) should not cause any kind of exception, prefetch(0) is
+ specifically ok.
+
+ prefetch() should be defined by the architecture, if not, the
+ #define below provides a no-op define.
+
+ There are 3 prefetch() macros:
+
+ prefetch(x) - prefetches the cacheline at "x" for read
+ prefetchw(x) - prefetches the cacheline at "x" for write
+ spin_lock_prefetch(x) - prefectches the spinlock *x for taking
+
+ there is also PREFETCH_STRIDE which is the architecure-prefered
+ "lookahead" size for prefetching streamed operations.
+
+*/
+
+/*
+ * These cannot be do{}while(0) macros. See the mental gymnastics in
+ * the loop macro.
+ */
+
+#ifndef ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_PREFETCHW
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_SPINLOCK_PREFETCH
+#define ARCH_HAS_SPINLOCK_PREFETCH
+#define spin_lock_prefetch(x) prefetchw(x)
+#endif
+
+#ifndef PREFETCH_STRIDE
+#define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
+#endif
+
+#endif
diff --git a/xen/include/xeno/reboot.h b/xen/include/xeno/reboot.h
new file mode 100644
index 0000000000..5f128a9525
--- /dev/null
+++ b/xen/include/xeno/reboot.h
@@ -0,0 +1,51 @@
+#ifndef _LINUX_REBOOT_H
+#define _LINUX_REBOOT_H
+
+/*
+ * Magic values required to use _reboot() system call.
+ */
+
+#define LINUX_REBOOT_MAGIC1 0xfee1dead
+#define LINUX_REBOOT_MAGIC2 672274793
+#define LINUX_REBOOT_MAGIC2A 85072278
+#define LINUX_REBOOT_MAGIC2B 369367448
+
+
+/*
+ * Commands accepted by the _reboot() system call.
+ *
+ * RESTART Restart system using default command and mode.
+ * HALT Stop OS and give system control to ROM monitor, if any.
+ * CAD_ON Ctrl-Alt-Del sequence causes RESTART command.
+ * CAD_OFF Ctrl-Alt-Del sequence sends SIGINT to init task.
+ * POWER_OFF Stop OS and remove all power from system, if possible.
+ * RESTART2 Restart system using given command string.
+ */
+
+#define LINUX_REBOOT_CMD_RESTART 0x01234567
+#define LINUX_REBOOT_CMD_HALT 0xCDEF0123
+#define LINUX_REBOOT_CMD_CAD_ON 0x89ABCDEF
+#define LINUX_REBOOT_CMD_CAD_OFF 0x00000000
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC
+#define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
+
+
+#ifdef __KERNEL__
+
+#include <linux/notifier.h>
+
+extern int register_reboot_notifier(struct notifier_block *);
+extern int unregister_reboot_notifier(struct notifier_block *);
+
+
+/*
+ * Architecture-specific implementations of sys_reboot commands.
+ */
+
+extern void machine_restart(char *cmd);
+extern void machine_halt(void);
+extern void machine_power_off(void);
+
+#endif
+
+#endif /* _LINUX_REBOOT_H */
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
new file mode 100644
index 0000000000..3cffa46bf1
--- /dev/null
+++ b/xen/include/xeno/sched.h
@@ -0,0 +1,224 @@
+#ifndef _LINUX_SCHED_H
+#define _LINUX_SCHED_H
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/spinlock.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <xeno/smp.h>
+#include <asm/processor.h>
+#include <asm/current.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <xeno/dom0_ops.h>
+
+extern unsigned long volatile jiffies;
+extern rwlock_t tasklist_lock;
+
+#include <xeno/spinlock.h>
+
+struct mm_struct {
+ unsigned long cpu_vm_mask;
+ /*
+ * Every domain has a L1 pagetable of its own. Per-domain mappings
+ * are put in this table (eg. the current GDT is mapped here).
+ */
+ l2_pgentry_t *perdomain_pt;
+ pagetable_t pagetable;
+ /* Current LDT selector. */
+ unsigned int ldt_sel;
+ /* Next entry is passed to LGDT on domain switch. */
+ char gdt[6];
+};
+
+/* Convenient accessor for mm.gdt. */
+#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e))
+#define SET_GDT_ADDRESS(_p, _a) ((*(u32 *)((_p)->mm.gdt + 2)) = (_a))
+#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0)))
+#define GET_GDT_ADDRESS(_p) ((*(u32 *)((_p)->mm.gdt + 2)))
+
+extern struct mm_struct init_mm;
+#define IDLE0_MM \
+{ \
+ cpu_vm_mask: 0, \
+ perdomain_pt: 0, \
+ pagetable: mk_pagetable(__pa(idle0_pg_table)) \
+}
+
+#define _HYP_EVENT_NEED_RESCHED 0
+#define _HYP_EVENT_NET_RX 1
+#define _HYP_EVENT_DIE 2
+
+#define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */
+#define PF_USEDFPU 0x2 /* Has this task used the FPU since last save? */
+#define PF_GUEST_STTS 0x4 /* Has the guest OS requested 'stts'? */
+
+#include <xeno/vif.h>
+#include <xeno/block.h>
+
+struct task_struct {
+
+ int processor;
+ int state;
+ int hyp_events;
+ unsigned int domain;
+
+ /* An unsafe pointer into a shared data area. */
+ shared_info_t *shared_info;
+
+ struct list_head pg_head;
+ unsigned int tot_pages;
+
+ /* Network I/O */
+ net_ring_t *net_ring_base;
+ net_vif_t *net_vif_list[MAX_GUEST_VIFS];
+ int num_net_vifs;
+
+ /* Block I/O */
+ blk_ring_t *blk_ring_base;
+ unsigned int blk_req_cons; /* request consumer */
+ struct list_head blkdev_list;
+ spinlock_t blk_ring_lock;
+
+ int has_cpu, policy, counter;
+
+ struct list_head run_list;
+
+ struct mm_struct mm;
+
+ mm_segment_t addr_limit; /* thread address space:
+ 0-0xBFFFFFFF for user-thead
+ 0-0xFFFFFFFF for kernel-thread
+ */
+
+ /*
+ * active_mm stays for now. It's entangled in the tricky TLB flushing
+ * stuff which I haven't addressed yet. It stays until I'm man enough
+ * to venture in.
+ */
+ struct mm_struct *active_mm;
+ struct thread_struct thread;
+ struct task_struct *prev_task, *next_task;
+
+ unsigned long flags;
+};
+
+/*
+ * domain states
+ * TASK_RUNNING: Domain is runable and should be on a run queue
+ * TASK_INTERRUPTIBLE: Domain is blocked by may be woken up by an event
+ * or expiring timer
+ * TASK_UNINTERRUPTIBLE: Domain is blocked but may not be woken up by an
+ * arbitrary event or timer.
+ * TASK_WAIT: Domains CPU allocation expired.
+ * TASK_STOPPED: not really used in Xen
+ * TASK_DYING: Domain is about to cross over to the land of the dead.
+ */
+
+#define TASK_RUNNING 0
+#define TASK_INTERRUPTIBLE 1
+#define TASK_UNINTERRUPTIBLE 2
+#define TASK_WAIT 4
+#define TASK_DYING 16
+/* #define TASK_STOPPED 8 not really used */
+
+#define SCHED_YIELD 0x10
+
+#include <asm/uaccess.h> /* for KERNEL_DS */
+
+#define IDLE0_TASK(_t) \
+{ \
+ processor: 0, \
+ domain: IDLE_DOMAIN_ID, \
+ state: TASK_RUNNING, \
+ has_cpu: 0, \
+ mm: IDLE0_MM, \
+ addr_limit: KERNEL_DS, \
+ active_mm: &idle0_task.mm, \
+ thread: INIT_THREAD, \
+ prev_task: &(_t), \
+ next_task: &(_t) \
+}
+
+#define IDLE_DOMAIN_ID (~0)
+#define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID)
+
+#ifndef IDLE0_TASK_SIZE
+#define IDLE0_TASK_SIZE 2048*sizeof(long)
+#endif
+
+union task_union {
+ struct task_struct task;
+ unsigned long stack[IDLE0_TASK_SIZE/sizeof(long)];
+};
+
+extern union task_union idle0_task_union;
+extern struct task_struct first_task_struct;
+
+extern struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu);
+extern int setup_guestos(struct task_struct *p, dom0_newdomain_t *params);
+extern int final_setup_guestos(struct task_struct *p, dom_meminfo_t *);
+
+struct task_struct *find_domain_by_id(unsigned int dom);
+extern void release_task(struct task_struct *);
+extern void kill_domain(void);
+extern void kill_domain_with_errmsg(const char *err);
+extern long kill_other_domain(unsigned int dom);
+
+/* arch/process.c */
+void new_thread(struct task_struct *p,
+ unsigned long start_pc,
+ unsigned long start_stack,
+ unsigned long start_info);
+extern void flush_thread(void);
+extern void exit_thread(void);
+
+/* Linux puts these here for some reason! */
+extern int request_irq(unsigned int,
+ void (*handler)(int, void *, struct pt_regs *),
+ unsigned long, const char *, void *);
+extern void free_irq(unsigned int, void *);
+
+extern unsigned long wait_init_idle;
+#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
+
+
+
+/*
+ * Scheduler functions (in schedule.c)
+ */
+#define set_current_state(_s) do { current->state = (_s); } while (0)
+#define MAX_SCHEDULE_TIMEOUT LONG_MAX
+void scheduler_init(void);
+void schedulers_start(void);
+void sched_add_domain(struct task_struct *p);
+void sched_rem_domain(struct task_struct *p);
+int wake_up(struct task_struct *p);
+long schedule_timeout(long timeout);
+long do_yield(void);
+void reschedule(struct task_struct *p);
+asmlinkage void schedule(void);
+
+
+#define signal_pending(_p) ((_p)->hyp_events || \
+ (_p)->shared_info->events)
+
+void domain_init(void);
+
+void cpu_idle(void);
+
+#define REMOVE_LINKS(p) do { \
+ (p)->next_task->prev_task = (p)->prev_task; \
+ (p)->prev_task->next_task = (p)->next_task; \
+ } while (0)
+
+#define SET_LINKS(p) do { \
+ (p)->next_task = &idle0_task; \
+ (p)->prev_task = idle0_task.prev_task; \
+ idle0_task.prev_task->next_task = (p); \
+ idle0_task.prev_task = (p); \
+ } while (0)
+
+extern void update_process_times(int user);
+
+#endif
diff --git a/xen/include/xeno/skbuff.h b/xen/include/xeno/skbuff.h
new file mode 100644
index 0000000000..519328a679
--- /dev/null
+++ b/xen/include/xeno/skbuff.h
@@ -0,0 +1,434 @@
+/*
+ * Definitions for the 'struct sk_buff' memory handlers.
+ *
+ * Authors:
+ * Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_SKBUFF_H
+#define _LINUX_SKBUFF_H
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+/* vif special values */
+#define VIF_PHYSICAL_INTERFACE -1
+#define VIF_UNKNOWN_INTERFACE -2
+#define VIF_DROP -3
+#define VIF_ANY_INTERFACE -4
+
+/* skb_type values */
+#define SKB_NORMAL 0 /* A Linux-style skbuff: no strangeness */
+#define SKB_ZERO_COPY 1 /* Zero copy skbs are used for receive */
+#define SKB_NODATA 2 /* Data allocation not handled by us */
+
+#define HAVE_ALLOC_SKB /* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB /* Ditto 8) */
+#define SLAB_SKB /* Slabified skbuffs */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
+
+/* A. Checksumming of received packets by device.
+ *
+ * NONE: device failed to checksum this packet.
+ * skb->csum is undefined.
+ *
+ * UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ * skb->csum is undefined.
+ * It is bad option, but, unfortunately, many of vendors do this.
+ * Apparently with secret goal to sell you new device, when you
+ * will add new protocol to your host. F.e. IPv6. 8)
+ *
+ * HW: the most generic way. Device supplied checksum of _all_
+ * the packet as seen by netif_rx in skb->csum.
+ * NOTE: Even if device supports only some protocols, but
+ * is able to produce some skb->csum, it MUST use HW,
+ * not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ * NONE: skb is checksummed by protocol or csum is not required.
+ *
+ * HW: device is required to csum packet as seen by hard_start_xmit
+ * from skb->h.raw to the end and to record the checksum
+ * at skb->h.raw+skb->csum.
+ *
+ * Device must show its capabilities in dev->features, set
+ * at device setup time.
+ * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
+ * everything.
+ * NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ * TCP/UDP over IPv4. Sigh. Vendors like this
+ * way by an unknown reason. Though, see comment above
+ * about CHECKSUM_UNNECESSARY. 8)
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void**)&arg)-1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+struct sk_buff_head {
+ /* These two members must be first. */
+ struct sk_buff * next;
+ struct sk_buff * prev;
+
+ __u32 qlen;
+ spinlock_t lock;
+};
+
+#define MAX_SKB_FRAGS 1 /* KAF: was 6 */
+
+typedef struct skb_frag_struct {
+ struct pfn_info *page;
+ __u16 page_offset;
+ __u16 size;
+} skb_frag_t;
+
+struct skb_shared_info {
+ unsigned int nr_frags;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct sk_buff {
+ /* These two members must be first. */
+ struct sk_buff * next; /* Next buffer in list */
+ struct sk_buff * prev; /* Previous buffer in list */
+
+ struct sk_buff_head * list; /* List we are on */
+ struct net_device *dev; /* Device we arrived on/are leaving by */
+
+ /* Transport layer header */
+ union
+ {
+ struct tcphdr *th;
+ struct udphdr *uh;
+ struct icmphdr *icmph;
+ struct igmphdr *igmph;
+ struct iphdr *ipiph;
+ struct spxhdr *spxh;
+ unsigned char *raw;
+ } h;
+
+ /* Network layer header */
+ union
+ {
+ struct iphdr *iph;
+ struct ipv6hdr *ipv6h;
+ struct arphdr *arph;
+ struct ipxhdr *ipxh;
+ unsigned char *raw;
+ } nh;
+
+ /* Link layer header */
+ union
+ {
+ struct ethhdr *ethernet;
+ unsigned char *raw;
+ } mac;
+
+ unsigned int len; /* Length of actual data */
+ unsigned int data_len;
+ unsigned int csum; /* Checksum */
+ unsigned char skb_type,
+ pkt_type, /* Packet class */
+ ip_summed; /* Driver fed us an IP checksum */
+ unsigned short protocol; /* Packet protocol from driver. */
+ unsigned char *head; /* Head of buffer */
+ unsigned char *data; /* Data head pointer */
+ unsigned char *tail; /* Tail pointer */
+ unsigned char *end; /* End pointer */
+
+ void (*destructor)(struct sk_buff *); /* Destruct function */
+ struct pfn_info *pf; /* record of physical pf address for freeing */
+ int src_vif; /* vif we came from */
+ int dst_vif; /* vif we are bound for */
+ struct skb_shared_info shinfo; /* shared info is no longer shared in Xen. */
+};
+
+extern void __kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *alloc_skb(unsigned int size, int priority);
+extern struct sk_buff *alloc_skb_nodata(int priority);
+extern struct sk_buff *alloc_zc_skb(unsigned int size, int priority);
+extern void kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
+#define dev_kfree_skb(a) kfree_skb(a)
+extern void skb_over_panic(struct sk_buff *skb, int len, void *here);
+extern void skb_under_panic(struct sk_buff *skb, int len, void *here);
+
+/* In Xen, we don't clone skbs, so shared data can go in the sk_buff struct. */
+#define skb_shinfo(SKB) ((struct skb_shared_info *)(&(SKB)->shinfo))
+
+/**
+ * kfree_skb - free an sk_buff
+ * @skb: buffer to free
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero.
+ */
+
+static inline void kfree_skb(struct sk_buff *skb)
+{
+ __kfree_skb(skb);
+}
+
+/**
+ * skb_queue_len - get queue length
+ * @list_: list to measure
+ *
+ * Return the length of an &sk_buff queue.
+ */
+
+static inline __u32 skb_queue_len(struct sk_buff_head *list_)
+{
+ return(list_->qlen);
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+ spin_lock_init(&list->lock);
+ list->prev = (struct sk_buff *)list;
+ list->next = (struct sk_buff *)list;
+ list->qlen = 0;
+}
+
+/**
+ * __skb_queue_head - queue a buffer at the list head
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the start of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+
+static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+ struct sk_buff *prev, *next;
+
+ newsk->list = list;
+ list->qlen++;
+ prev = (struct sk_buff *)list;
+ next = prev->next;
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = newsk;
+ prev->next = newsk;
+}
+
+/**
+ * __skb_dequeue - remove from the head of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the head of the list. This function does not take any locks
+ * so must be used with appropriate locks held only. The head item is
+ * returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+ struct sk_buff *next, *prev, *result;
+
+ prev = (struct sk_buff *) list;
+ next = prev->next;
+ result = NULL;
+ if (next != prev) {
+ result = next;
+ next = next->next;
+ list->qlen--;
+ next->prev = prev;
+ prev->next = next;
+ result->next = NULL;
+ result->prev = NULL;
+ result->list = NULL;
+ }
+ return result;
+}
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+ return skb->data_len;
+}
+
+#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
+
+/*
+ * Add data to an sk_buff
+ */
+
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp=skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail+=len;
+ skb->len+=len;
+ return tmp;
+}
+
+/**
+ * skb_put - add data to a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer. If this would
+ * exceed the total buffer size the kernel will panic. A pointer to the
+ * first byte of the extra data is returned.
+ */
+
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp=skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail+=len;
+ skb->len+=len;
+ if(skb->tail>skb->end) {
+ skb_over_panic(skb, len, current_text_addr());
+ }
+ return tmp;
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data-=len;
+ skb->len+=len;
+ return skb->data;
+}
+
+/**
+ * skb_push - add data to the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer at the buffer
+ * start. If this would exceed the total buffer headroom the kernel will
+ * panic. A pointer to the first byte of the extra data is returned.
+ */
+
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data-=len;
+ skb->len+=len;
+ if(skb->data<skb->head) {
+ skb_under_panic(skb, len, current_text_addr());
+ }
+ return skb->data;
+}
+
+static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ skb->len-=len;
+ if (skb->len < skb->data_len)
+ BUG();
+ return skb->data+=len;
+}
+
+/**
+ * skb_pull - remove data from the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to remove
+ *
+ * This function removes data from the start of a buffer, returning
+ * the memory to the headroom. A pointer to the next data in the buffer
+ * is returned. Once the data has been pulled future pushes will overwrite
+ * the old data.
+ */
+
+static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (len > skb->len)
+ return NULL;
+ return __skb_pull(skb,len);
+}
+
+/**
+ * skb_reserve - adjust headroom
+ * @skb: buffer to alter
+ * @len: bytes to move
+ *
+ * Increase the headroom of an empty &sk_buff by reducing the tail
+ * room. This is only allowed for an empty buffer.
+ */
+
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+ skb->data+=len;
+ skb->tail+=len;
+}
+
+/**
+ * __dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory.
+ */
+
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+ int gfp_mask)
+{
+ struct sk_buff *skb;
+ skb = alloc_zc_skb(length+16, gfp_mask);
+ if (skb)
+ skb_reserve(skb,16);
+ return skb;
+}
+
+/**
+ * dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory. Although this function
+ * allocates memory it can be called from an interrupt.
+ */
+
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+ return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+ return page_address(frag->page);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+}
+
+extern int skb_copy_bits(const struct sk_buff *skb,
+ int offset, void *to, int len);
+extern void skb_init(void);
+
+#endif /* _LINUX_SKBUFF_H */
diff --git a/xen/include/xeno/slab.h b/xen/include/xeno/slab.h
new file mode 100644
index 0000000000..21a53051f1
--- /dev/null
+++ b/xen/include/xeno/slab.h
@@ -0,0 +1,84 @@
+/*
+ * linux/mm/slab.h
+ * Written by Mark Hemment, 1996.
+ * (markhe@nextd.demon.co.uk)
+ */
+
+#if !defined(_LINUX_SLAB_H)
+#define _LINUX_SLAB_H
+
+typedef struct kmem_cache_s kmem_cache_t;
+
+#include <xeno/mm.h>
+#include <xeno/cache.h>
+
+/* flags for kmem_cache_alloc() */
+#define SLAB_NOFS GFP_NOFS
+#define SLAB_NOIO GFP_NOIO
+#define SLAB_NOHIGHIO GFP_NOHIGHIO
+#define SLAB_ATOMIC GFP_ATOMIC
+#define SLAB_USER GFP_USER
+#define SLAB_KERNEL GFP_KERNEL
+#define SLAB_NFS GFP_NFS
+#define SLAB_DMA GFP_DMA
+
+#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS)
+#define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */
+
+/* flags to pass to kmem_cache_create().
+ * The first 3 are only valid when the allocator as been build
+ * SLAB_DEBUG_SUPPORT.
+ */
+#define SLAB_DEBUG_FREE 0x00000100UL /* Peform (expensive) checks on free */
+#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */
+#define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */
+#define SLAB_POISON 0x00000800UL /* Poison objects */
+#define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */
+#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */
+#define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */
+
+/* flags passed to a constructor func */
+#define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
+#define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */
+#define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */
+
+/* prototypes */
+extern void kmem_cache_init(void);
+extern void kmem_cache_sizes_init(unsigned long);
+
+extern kmem_cache_t *kmem_find_general_cachep(size_t, int gfpflags);
+extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
+ void (*)(void *, kmem_cache_t *, unsigned long),
+ void (*)(void *, kmem_cache_t *, unsigned long));
+extern int kmem_cache_destroy(kmem_cache_t *);
+extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc(kmem_cache_t *, int);
+extern void kmem_cache_free(kmem_cache_t *, void *);
+
+extern void *kmalloc(size_t, int);
+extern void kfree(const void *);
+
+extern int FASTCALL(kmem_cache_reap(int));
+
+extern void dump_slabinfo();
+
+#if 0
+extern int slabinfo_read_proc(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+extern int slabinfo_write_proc(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+#endif
+
+/* System wide caches */
+extern kmem_cache_t *vm_area_cachep;
+extern kmem_cache_t *mm_cachep;
+extern kmem_cache_t *names_cachep;
+extern kmem_cache_t *files_cachep;
+extern kmem_cache_t *filp_cachep;
+extern kmem_cache_t *dquot_cachep;
+extern kmem_cache_t *bh_cachep;
+extern kmem_cache_t *fs_cachep;
+extern kmem_cache_t *sigact_cachep;
+
+
+#endif /* _LINUX_SLAB_H */
diff --git a/xen/include/xeno/smp.h b/xen/include/xeno/smp.h
new file mode 100644
index 0000000000..786026649e
--- /dev/null
+++ b/xen/include/xeno/smp.h
@@ -0,0 +1,88 @@
+#ifndef __LINUX_SMP_H
+#define __LINUX_SMP_H
+
+/*
+ * Generic SMP support
+ * Alan Cox. <alan@redhat.com>
+ */
+
+#include <xeno/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <asm/smp.h>
+
+/*
+ * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
+ * (defined in asm header):
+ */
+
+/*
+ * stops all CPUs but the current one:
+ */
+extern void smp_send_stop(void);
+
+extern void FASTCALL(smp_send_event_check_mask(unsigned long cpu_mask));
+#define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu))
+
+
+/*
+ * Boot processor call to load the other CPU's
+ */
+extern void smp_boot_cpus(void);
+
+/*
+ * Processor call in. Must hold processors until ..
+ */
+extern void smp_callin(void);
+
+/*
+ * Multiprocessors may now schedule
+ */
+extern void smp_commence(void);
+
+/*
+ * Call a function on all other processors
+ */
+extern int smp_call_function (void (*func) (void *info), void *info,
+ int retry, int wait);
+
+/*
+ * True once the per process idle is forked
+ */
+extern int smp_threads_ready;
+
+extern int smp_num_cpus;
+
+extern volatile unsigned long smp_msg_data;
+extern volatile int smp_src_cpu;
+extern volatile int smp_msg_id;
+
+#define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */
+#define MSG_ALL 0x8001
+
+#define MSG_INVALIDATE_TLB 0x0001 /* Remote processor TLB invalidate */
+#define MSG_STOP_CPU 0x0002 /* Sent to shut down slave CPU's
+ * when rebooting
+ */
+#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/
+#define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */
+
+#else
+
+/*
+ * These macros fold the SMP functionality into a single CPU system
+ */
+
+#define smp_num_cpus 1
+#define smp_processor_id() 0
+#define hard_smp_processor_id() 0
+#define smp_threads_ready 1
+#define kernel_lock()
+#define cpu_logical_map(cpu) 0
+#define cpu_number_map(cpu) 0
+#define smp_call_function(func,info,retry,wait) ({ 0; })
+#define cpu_online_map 1
+
+#endif
+#endif
diff --git a/xen/include/xeno/socket.h b/xen/include/xeno/socket.h
new file mode 100644
index 0000000000..92e7b93ae5
--- /dev/null
+++ b/xen/include/xeno/socket.h
@@ -0,0 +1,136 @@
+#ifndef _LINUX_SOCKET_H
+#define _LINUX_SOCKET_H
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+//#include <asm/socket.h> /* arch-dependent defines */
+#include <linux/sockios.h> /* the SIOCxxx I/O controls */
+//#include <linux/uio.h> /* iovec support */
+#include <linux/types.h> /* pid_t */
+
+typedef unsigned short sa_family_t;
+
+/*
+ * 1003.1g requires sa_family_t and that sa_data is char.
+ */
+
+struct sockaddr {
+ sa_family_t sa_family; /* address family, AF_xxx */
+ char sa_data[14]; /* 14 bytes of protocol address */
+};
+
+/* Supported address families. */
+#define AF_UNSPEC 0
+#define AF_UNIX 1 /* Unix domain sockets */
+#define AF_LOCAL 1 /* POSIX name for AF_UNIX */
+#define AF_INET 2 /* Internet IP Protocol */
+#define AF_AX25 3 /* Amateur Radio AX.25 */
+#define AF_IPX 4 /* Novell IPX */
+#define AF_APPLETALK 5 /* AppleTalk DDP */
+#define AF_NETROM 6 /* Amateur Radio NET/ROM */
+#define AF_BRIDGE 7 /* Multiprotocol bridge */
+#define AF_ATMPVC 8 /* ATM PVCs */
+#define AF_X25 9 /* Reserved for X.25 project */
+#define AF_INET6 10 /* IP version 6 */
+#define AF_ROSE 11 /* Amateur Radio X.25 PLP */
+#define AF_DECnet 12 /* Reserved for DECnet project */
+#define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/
+#define AF_SECURITY 14 /* Security callback pseudo AF */
+#define AF_KEY 15 /* PF_KEY key management API */
+#define AF_NETLINK 16
+#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */
+#define AF_PACKET 17 /* Packet family */
+#define AF_ASH 18 /* Ash */
+#define AF_ECONET 19 /* Acorn Econet */
+#define AF_ATMSVC 20 /* ATM SVCs */
+#define AF_SNA 22 /* Linux SNA Project (nutters!) */
+#define AF_IRDA 23 /* IRDA sockets */
+#define AF_PPPOX 24 /* PPPoX sockets */
+#define AF_WANPIPE 25 /* Wanpipe API Sockets */
+#define AF_BLUETOOTH 31 /* Bluetooth sockets */
+#define AF_MAX 32 /* For now.. */
+
+/* Protocol families, same as address families. */
+#define PF_UNSPEC AF_UNSPEC
+#define PF_UNIX AF_UNIX
+#define PF_LOCAL AF_LOCAL
+#define PF_INET AF_INET
+#define PF_AX25 AF_AX25
+#define PF_IPX AF_IPX
+#define PF_APPLETALK AF_APPLETALK
+#define PF_NETROM AF_NETROM
+#define PF_BRIDGE AF_BRIDGE
+#define PF_ATMPVC AF_ATMPVC
+#define PF_X25 AF_X25
+#define PF_INET6 AF_INET6
+#define PF_ROSE AF_ROSE
+#define PF_DECnet AF_DECnet
+#define PF_NETBEUI AF_NETBEUI
+#define PF_SECURITY AF_SECURITY
+#define PF_KEY AF_KEY
+#define PF_NETLINK AF_NETLINK
+#define PF_ROUTE AF_ROUTE
+#define PF_PACKET AF_PACKET
+#define PF_ASH AF_ASH
+#define PF_ECONET AF_ECONET
+#define PF_ATMSVC AF_ATMSVC
+#define PF_SNA AF_SNA
+#define PF_IRDA AF_IRDA
+#define PF_PPPOX AF_PPPOX
+#define PF_WANPIPE AF_WANPIPE
+#define PF_BLUETOOTH AF_BLUETOOTH
+#define PF_MAX AF_MAX
+
+/* Maximum queue length specifiable by listen. */
+#define SOMAXCONN 128
+
+/* Flags we can use with send/ and recv.
+ Added those for 1003.1g not all are supported yet
+ */
+
+#define MSG_OOB 1
+#define MSG_PEEK 2
+#define MSG_DONTROUTE 4
+#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC 8
+#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC 0x20
+#define MSG_DONTWAIT 0x40 /* Nonblocking io */
+#define MSG_EOR 0x80 /* End of record */
+#define MSG_WAITALL 0x100 /* Wait for a full request */
+#define MSG_FIN 0x200
+#define MSG_SYN 0x400
+#define MSG_CONFIRM 0x800 /* Confirm path validity */
+#define MSG_RST 0x1000
+#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */
+#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
+#define MSG_MORE 0x8000 /* Sender will send more */
+
+#define MSG_EOF MSG_FIN
+
+
+/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
+#define SOL_IP 0
+/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */
+#define SOL_TCP 6
+#define SOL_UDP 17
+#define SOL_IPV6 41
+#define SOL_ICMPV6 58
+#define SOL_RAW 255
+#define SOL_IPX 256
+#define SOL_AX25 257
+#define SOL_ATALK 258
+#define SOL_NETROM 259
+#define SOL_ROSE 260
+#define SOL_DECNET 261
+#define SOL_X25 262
+#define SOL_PACKET 263
+#define SOL_ATM 264 /* ATM layer (cell level) */
+#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */
+#define SOL_IRDA 266
+
+/* IPX options */
+#define IPX_TYPE 1
+
+#endif /* not kernel and not glibc */
+#endif /* _LINUX_SOCKET_H */
diff --git a/xen/include/xeno/sockios.h b/xen/include/xeno/sockios.h
new file mode 100644
index 0000000000..a0ad8100bf
--- /dev/null
+++ b/xen/include/xeno/sockios.h
@@ -0,0 +1,132 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions of the socket-level I/O control calls.
+ *
+ * Version: @(#)sockios.h 1.0.2 03/09/93
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_SOCKIOS_H
+#define _LINUX_SOCKIOS_H
+
+//#include <asm/sockios.h>
+
+/* Linux-specific socket ioctls */
+#define SIOCINQ FIONREAD
+#define SIOCOUTQ TIOCOUTQ
+
+/* Routing table calls. */
+#define SIOCADDRT 0x890B /* add routing table entry */
+#define SIOCDELRT 0x890C /* delete routing table entry */
+#define SIOCRTMSG 0x890D /* call to routing system */
+
+/* Socket configuration controls. */
+#define SIOCGIFNAME 0x8910 /* get iface name */
+#define SIOCSIFLINK 0x8911 /* set iface channel */
+#define SIOCGIFCONF 0x8912 /* get iface list */
+#define SIOCGIFFLAGS 0x8913 /* get flags */
+#define SIOCSIFFLAGS 0x8914 /* set flags */
+#define SIOCGIFADDR 0x8915 /* get PA address */
+#define SIOCSIFADDR 0x8916 /* set PA address */
+#define SIOCGIFDSTADDR 0x8917 /* get remote PA address */
+#define SIOCSIFDSTADDR 0x8918 /* set remote PA address */
+#define SIOCGIFBRDADDR 0x8919 /* get broadcast PA address */
+#define SIOCSIFBRDADDR 0x891a /* set broadcast PA address */
+#define SIOCGIFNETMASK 0x891b /* get network PA mask */
+#define SIOCSIFNETMASK 0x891c /* set network PA mask */
+#define SIOCGIFMETRIC 0x891d /* get metric */
+#define SIOCSIFMETRIC 0x891e /* set metric */
+#define SIOCGIFMEM 0x891f /* get memory address (BSD) */
+#define SIOCSIFMEM 0x8920 /* set memory address (BSD) */
+#define SIOCGIFMTU 0x8921 /* get MTU size */
+#define SIOCSIFMTU 0x8922 /* set MTU size */
+#define SIOCSIFNAME 0x8923 /* set interface name */
+#define SIOCSIFHWADDR 0x8924 /* set hardware address */
+#define SIOCGIFENCAP 0x8925 /* get/set encapsulations */
+#define SIOCSIFENCAP 0x8926
+#define SIOCGIFHWADDR 0x8927 /* Get hardware address */
+#define SIOCGIFSLAVE 0x8929 /* Driver slaving support */
+#define SIOCSIFSLAVE 0x8930
+#define SIOCADDMULTI 0x8931 /* Multicast address lists */
+#define SIOCDELMULTI 0x8932
+#define SIOCGIFINDEX 0x8933 /* name -> if_index mapping */
+#define SIOGIFINDEX SIOCGIFINDEX /* misprint compatibility :-) */
+#define SIOCSIFPFLAGS 0x8934 /* set/get extended flags set */
+#define SIOCGIFPFLAGS 0x8935
+#define SIOCDIFADDR 0x8936 /* delete PA address */
+#define SIOCSIFHWBROADCAST 0x8937 /* set hardware broadcast addr */
+#define SIOCGIFCOUNT 0x8938 /* get number of devices */
+
+#define SIOCGIFBR 0x8940 /* Bridging support */
+#define SIOCSIFBR 0x8941 /* Set bridging options */
+
+#define SIOCGIFDIVERT 0x8944 /* Frame diversion support */
+#define SIOCSIFDIVERT 0x8945 /* Set frame diversion options */
+
+#define SIOCETHTOOL 0x8946 /* Ethtool interface */
+
+#define SIOCGMIIPHY 0x8947 /* Get address of MII PHY in use. */
+#define SIOCGMIIREG 0x8948 /* Read MII PHY register. */
+#define SIOCSMIIREG 0x8949 /* Write MII PHY register. */
+
+/* ARP cache control calls. */
+ /* 0x8950 - 0x8952 * obsolete calls, don't re-use */
+#define SIOCDARP 0x8953 /* delete ARP table entry */
+#define SIOCGARP 0x8954 /* get ARP table entry */
+#define SIOCSARP 0x8955 /* set ARP table entry */
+
+/* RARP cache control calls. */
+#define SIOCDRARP 0x8960 /* delete RARP table entry */
+#define SIOCGRARP 0x8961 /* get RARP table entry */
+#define SIOCSRARP 0x8962 /* set RARP table entry */
+
+/* Driver configuration calls */
+
+#define SIOCGIFMAP 0x8970 /* Get device parameters */
+#define SIOCSIFMAP 0x8971 /* Set device parameters */
+
+/* DLCI configuration calls */
+
+#define SIOCADDDLCI 0x8980 /* Create new DLCI device */
+#define SIOCDELDLCI 0x8981 /* Delete DLCI device */
+
+#define SIOCGIFVLAN 0x8982 /* 802.1Q VLAN support */
+#define SIOCSIFVLAN 0x8983 /* Set 802.1Q VLAN options */
+
+/* bonding calls */
+
+#define SIOCBONDENSLAVE 0x8990 /* enslave a device to the bond */
+#define SIOCBONDRELEASE 0x8991 /* release a slave from the bond*/
+#define SIOCBONDSETHWADDR 0x8992 /* set the hw addr of the bond */
+#define SIOCBONDSLAVEINFOQUERY 0x8993 /* rtn info about slave state */
+#define SIOCBONDINFOQUERY 0x8994 /* rtn info about bond state */
+#define SIOCBONDCHANGEACTIVE 0x8995 /* update to a new active slave */
+
+/* Device private ioctl calls */
+
+/*
+ * These 16 ioctls are available to devices via the do_ioctl() device
+ * vector. Each device should include this file and redefine these names
+ * as their own. Because these are device dependent it is a good idea
+ * _NOT_ to issue them to random objects and hope.
+ *
+ * THESE IOCTLS ARE _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X -DaveM
+ */
+
+#define SIOCDEVPRIVATE 0x89F0 /* to 89FF */
+
+/*
+ * These 16 ioctl calls are protocol private
+ */
+
+#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */
+#endif /* _LINUX_SOCKIOS_H */
diff --git a/xen/include/xeno/spinlock.h b/xen/include/xeno/spinlock.h
new file mode 100644
index 0000000000..08f2eb6098
--- /dev/null
+++ b/xen/include/xeno/spinlock.h
@@ -0,0 +1,142 @@
+#ifndef __LINUX_SPINLOCK_H
+#define __LINUX_SPINLOCK_H
+
+#include <xeno/config.h>
+#include <asm/system.h>
+
+/*
+ * These are the generic versions of the spinlocks and read-write
+ * locks..
+ */
+#define spin_lock_irqsave(lock, flags) do { local_irq_save(flags); spin_lock(lock); } while (0)
+#define spin_lock_irq(lock) do { local_irq_disable(); spin_lock(lock); } while (0)
+#define spin_lock_bh(lock) do { local_bh_disable(); spin_lock(lock); } while (0)
+
+#define read_lock_irqsave(lock, flags) do { local_irq_save(flags); read_lock(lock); } while (0)
+#define read_lock_irq(lock) do { local_irq_disable(); read_lock(lock); } while (0)
+#define read_lock_bh(lock) do { local_bh_disable(); read_lock(lock); } while (0)
+
+#define write_lock_irqsave(lock, flags) do { local_irq_save(flags); write_lock(lock); } while (0)
+#define write_lock_irq(lock) do { local_irq_disable(); write_lock(lock); } while (0)
+#define write_lock_bh(lock) do { local_bh_disable(); write_lock(lock); } while (0)
+
+#define spin_unlock_irqrestore(lock, flags) do { spin_unlock(lock); local_irq_restore(flags); } while (0)
+#define spin_unlock_irq(lock) do { spin_unlock(lock); local_irq_enable(); } while (0)
+#define spin_unlock_bh(lock) do { spin_unlock(lock); local_bh_enable(); } while (0)
+
+#define read_unlock_irqrestore(lock, flags) do { read_unlock(lock); local_irq_restore(flags); } while (0)
+#define read_unlock_irq(lock) do { read_unlock(lock); local_irq_enable(); } while (0)
+#define read_unlock_bh(lock) do { read_unlock(lock); local_bh_enable(); } while (0)
+
+#define write_unlock_irqrestore(lock, flags) do { write_unlock(lock); local_irq_restore(flags); } while (0)
+#define write_unlock_irq(lock) do { write_unlock(lock); local_irq_enable(); } while (0)
+#define write_unlock_bh(lock) do { write_unlock(lock); local_bh_enable(); } while (0)
+#define spin_trylock_bh(lock) ({ int __r; local_bh_disable();\
+ __r = spin_trylock(lock); \
+ if (!__r) local_bh_enable(); \
+ __r; })
+
+#ifdef CONFIG_SMP
+#include <asm/spinlock.h>
+
+#elif !defined(spin_lock_init) /* !SMP and spin_lock_init not previously
+ defined (e.g. by including asm/spinlock.h */
+
+#define DEBUG_SPINLOCKS 0 /* 0 == no debugging, 1 == maintain lock state, 2 == full debug */
+
+#if (DEBUG_SPINLOCKS < 1)
+
+#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
+#define ATOMIC_DEC_AND_LOCK
+
+/*
+ * Your basic spinlocks, allowing only a single CPU anywhere
+ *
+ * Most gcc versions have a nasty bug with empty initializers.
+ */
+#if (__GNUC__ > 2)
+ typedef struct { } spinlock_t;
+ #define SPIN_LOCK_UNLOCKED (spinlock_t) { }
+#else
+ typedef struct { int gcc_is_buggy; } spinlock_t;
+ #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
+#endif
+
+#define spin_lock_init(lock) do { } while(0)
+#define spin_lock(lock) (void)(lock) /* Not "unused variable". */
+#define spin_is_locked(lock) (0)
+#define spin_trylock(lock) ({1; })
+#define spin_unlock_wait(lock) do { } while(0)
+#define spin_unlock(lock) do { } while(0)
+
+#elif (DEBUG_SPINLOCKS < 2)
+
+typedef struct {
+ volatile unsigned long lock;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
+
+#define spin_lock_init(x) do { (x)->lock = 0; } while (0)
+#define spin_is_locked(lock) (test_bit(0,(lock)))
+#define spin_trylock(lock) (!test_and_set_bit(0,(lock)))
+
+#define spin_lock(x) do { (x)->lock = 1; } while (0)
+#define spin_unlock_wait(x) do { } while (0)
+#define spin_unlock(x) do { (x)->lock = 0; } while (0)
+
+#else /* (DEBUG_SPINLOCKS >= 2) */
+
+typedef struct {
+ volatile unsigned long lock;
+ volatile unsigned int babble;
+ const char *module;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0, 25, __BASE_FILE__ }
+
+/*#include <linux/kernel.h>*/
+
+#define spin_lock_init(x) do { (x)->lock = 0; } while (0)
+#define spin_is_locked(lock) (test_bit(0,(lock)))
+#define spin_trylock(lock) (!test_and_set_bit(0,(lock)))
+
+#define spin_lock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_lock(%s:%p) already locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 1; restore_flags(__spinflags);} while (0)
+#define spin_unlock_wait(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock_wait(%s:%p) deadlock\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} restore_flags(__spinflags);} while (0)
+#define spin_unlock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if (!(x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock(%s:%p) not locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 0; restore_flags(__spinflags);} while (0)
+
+#endif /* DEBUG_SPINLOCKS */
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ *
+ * Most gcc versions have a nasty bug with empty initializers.
+ */
+#if (__GNUC__ > 2)
+ typedef struct { } rwlock_t;
+ #define RW_LOCK_UNLOCKED (rwlock_t) { }
+#else
+ typedef struct { int gcc_is_buggy; } rwlock_t;
+ #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
+#endif
+
+#define rwlock_init(lock) do { } while(0)
+#define read_lock(lock) (void)(lock) /* Not "unused variable". */
+#define read_unlock(lock) do { } while(0)
+#define write_lock(lock) (void)(lock) /* Not "unused variable". */
+#define write_unlock(lock) do { } while(0)
+
+#endif /* !SMP */
+
+/* "lock on reference count zero" */
+#ifndef ATOMIC_DEC_AND_LOCK
+#include <asm/atomic.h>
+extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+#endif
+
+#endif /* __LINUX_SPINLOCK_H */
diff --git a/xen/include/xeno/time.h b/xen/include/xeno/time.h
new file mode 100644
index 0000000000..a017b0d2b0
--- /dev/null
+++ b/xen/include/xeno/time.h
@@ -0,0 +1,98 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: time.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: This file provides a one stop shop for all time related
+ * issues within the hypervisor.
+ *
+ * The Hypervisor provides the following notions of time:
+ * Cycle Counter Time, System Time, Wall Clock Time, and
+ * Domain Virtual Time.
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+
+
+#ifndef __XENO_TIME_H__
+#define __XENO_TIME_H__
+
+#include <asm/ptrace.h> /* XXX Only used for do_timer which should be moved */
+#include <asm/time.h> /* pull in architecture specific time definition */
+#include <xeno/types.h>
+
+/*
+ * Init time
+ */
+extern int init_xeno_time();
+
+/*
+ * Cycle Counter Time (defined in asm/time.h)
+ */
+
+
+extern u64 cpu_freq;
+
+/*
+ * System Time
+ * 64 bit value containing the nanoseconds elapsed since boot time.
+ * This value is adjusted by frequency drift.
+ * NOW() returns the current time.
+ * The other macros are for convenience to approximate short intervals
+ * of real time into system time
+ */
+
+s_time_t get_s_time(void);
+
+#define NOW() ((s_time_t)get_s_time())
+#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL )
+#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL )
+#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL )
+#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL )
+#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL )
+#define Time_Max ((s_time_t) 0x7fffffffffffffffLL)
+#define FOREVER Time_Max
+
+/*
+ * Wall Clock Time
+ */
+struct timeval {
+ long tv_sec; /* seconds */
+ long tv_usec; /* microseconds */
+};
+
+struct timezone {
+ int tz_minuteswest; /* minutes west of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+
+#ifdef __KERNEL__
+extern void do_gettimeofday(struct timeval *tv);
+extern void do_settimeofday(struct timeval *tv);
+extern void get_fast_time(struct timeval *tv);
+extern void (*do_get_fast_time)(struct timeval *);
+#endif
+
+/*
+ * Domain Virtual Time (defined in asm/time.h)
+ */
+/* XXX Interface for getting and setting still missing */
+
+
+/* update the per domain time information */
+extern void update_dom_time(shared_info_t *si);
+
+/* XXX move this */
+extern void do_timer(struct pt_regs *regs);
+
+#endif /* __XENO_TIME_H__ */
diff --git a/xen/include/xeno/timer.h b/xen/include/xeno/timer.h
new file mode 100644
index 0000000000..dcde75b182
--- /dev/null
+++ b/xen/include/xeno/timer.h
@@ -0,0 +1,81 @@
+#ifndef _LINUX_TIMER_H
+#define _LINUX_TIMER_H
+
+#include <linux/config.h>
+#include <linux/list.h>
+
+/*
+ * In Linux 2.4, static timers have been removed from the kernel.
+ * Timers may be dynamically created and destroyed, and should be initialized
+ * by a call to init_timer() upon creation.
+ *
+ * The "data" field enables use of a common timeout function for several
+ * timeouts. You can use this field to distinguish between the different
+ * invocations.
+ *
+ * RN: Unlike the Linux timers, which are executed at the periodic timer
+ * interrupt, in Xen, the timer list is only checked "occasionally", thus
+ * its accuracy might be somewhat worse than under Linux. However, the
+ * hypervisor should be purely event-driven and, in fact, in the current
+ * implementation, timers are only used for watchdog purpose at a very
+ * coarse granularity anyway. Thus this is not a problem.
+ */
+struct timer_list {
+ struct list_head list;
+ unsigned long expires; /* jiffies */
+ unsigned long data;
+ void (*function)(unsigned long);
+};
+
+extern void add_timer(struct timer_list * timer);
+extern int del_timer(struct timer_list * timer);
+
+#ifdef CONFIG_SMP
+extern int del_timer_sync(struct timer_list * timer);
+extern void sync_timers(void);
+#else
+#define del_timer_sync(t) del_timer(t)
+#define sync_timers() do { } while (0)
+#endif
+
+/*
+ * mod_timer is a more efficient way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a).
+ * If the timer is known to be not pending (ie, in the handler), mod_timer
+ * is less efficient than a->expires = b; add_timer(a).
+ */
+int mod_timer(struct timer_list *timer, unsigned long expires);
+
+extern void it_real_fn(unsigned long);
+
+static inline void init_timer(struct timer_list * timer)
+{
+ timer->list.next = timer->list.prev = NULL;
+}
+
+static inline int timer_pending (const struct timer_list * timer)
+{
+ return timer->list.next != NULL;
+}
+
+/*
+ * These inlines deal with timer wrapping correctly. You are
+ * strongly encouraged to use them
+ * 1. Because people otherwise forget
+ * 2. Because if the timer wrap changes in future you wont have to
+ * alter your driver code.
+ *
+ * time_after(a,b) returns true if the time a is after time b.
+ *
+ * Do this with "<0" and ">=0" to only test the sign of the result. A
+ * good compiler would generate better code (and a really good compiler
+ * wouldn't care). Gcc is currently neither.
+ */
+#define time_after(a,b) ((long)(b) - (long)(a) < 0)
+#define time_before(a,b) time_after(b,a)
+
+#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0)
+#define time_before_eq(a,b) time_after_eq(b,a)
+
+#endif
diff --git a/xen/include/xeno/timex.h b/xen/include/xeno/timex.h
new file mode 100644
index 0000000000..3a00a26e2d
--- /dev/null
+++ b/xen/include/xeno/timex.h
@@ -0,0 +1,291 @@
+/*****************************************************************************
+ * *
+ * Copyright (c) David L. Mills 1993 *
+ * *
+ * Permission to use, copy, modify, and distribute this software and its *
+ * documentation for any purpose and without fee is hereby granted, provided *
+ * that the above copyright notice appears in all copies and that both the *
+ * copyright notice and this permission notice appear in supporting *
+ * documentation, and that the name University of Delaware not be used in *
+ * advertising or publicity pertaining to distribution of the software *
+ * without specific, written prior permission. The University of Delaware *
+ * makes no representations about the suitability this software for any *
+ * purpose. It is provided "as is" without express or implied warranty. *
+ * *
+ *****************************************************************************/
+
+/*
+ * Modification history timex.h
+ *
+ * 29 Dec 97 Russell King
+ * Moved CLOCK_TICK_RATE, CLOCK_TICK_FACTOR and FINETUNE to asm/timex.h
+ * for ARM machines
+ *
+ * 9 Jan 97 Adrian Sun
+ * Shifted LATCH define to allow access to alpha machines.
+ *
+ * 26 Sep 94 David L. Mills
+ * Added defines for hybrid phase/frequency-lock loop.
+ *
+ * 19 Mar 94 David L. Mills
+ * Moved defines from kernel routines to header file and added new
+ * defines for PPS phase-lock loop.
+ *
+ * 20 Feb 94 David L. Mills
+ * Revised status codes and structures for external clock and PPS
+ * signal discipline.
+ *
+ * 28 Nov 93 David L. Mills
+ * Adjusted parameters to improve stability and increase poll
+ * interval.
+ *
+ * 17 Sep 93 David L. Mills
+ * Created file $NTP/include/sys/timex.h
+ * 07 Oct 93 Torsten Duwe
+ * Derived linux/timex.h
+ * 1995-08-13 Torsten Duwe
+ * kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1997-08-30 Ulrich Windl
+ * Added new constant NTP_PHASE_LIMIT
+ */
+#ifndef _LINUX_TIMEX_H
+#define _LINUX_TIMEX_H
+
+#include <asm/param.h>
+
+/*
+ * The following defines establish the engineering parameters of the PLL
+ * model. The HZ variable establishes the timer interrupt frequency, 100 Hz
+ * for the SunOS kernel, 256 Hz for the Ultrix kernel and 1024 Hz for the
+ * OSF/1 kernel. The SHIFT_HZ define expresses the same value as the
+ * nearest power of two in order to avoid hardware multiply operations.
+ */
+#if HZ >= 12 && HZ < 24
+# define SHIFT_HZ 4
+#elif HZ >= 24 && HZ < 48
+# define SHIFT_HZ 5
+#elif HZ >= 48 && HZ < 96
+# define SHIFT_HZ 6
+#elif HZ >= 96 && HZ < 192
+# define SHIFT_HZ 7
+#elif HZ >= 192 && HZ < 384
+# define SHIFT_HZ 8
+#elif HZ >= 384 && HZ < 768
+# define SHIFT_HZ 9
+#elif HZ >= 768 && HZ < 1536
+# define SHIFT_HZ 10
+#else
+# error You lose.
+#endif
+
+/*
+ * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
+ * for a slightly underdamped convergence characteristic. SHIFT_KH
+ * establishes the damping of the FLL and is chosen by wisdom and black
+ * art.
+ *
+ * MAXTC establishes the maximum time constant of the PLL. With the
+ * SHIFT_KG and SHIFT_KF values given and a time constant range from
+ * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
+ * respectively.
+ */
+#define SHIFT_KG 6 /* phase factor (shift) */
+#define SHIFT_KF 16 /* PLL frequency factor (shift) */
+#define SHIFT_KH 2 /* FLL frequency factor (shift) */
+#define MAXTC 6 /* maximum time constant (shift) */
+
+/*
+ * The SHIFT_SCALE define establishes the decimal point of the time_phase
+ * variable which serves as an extension to the low-order bits of the
+ * system clock variable. The SHIFT_UPDATE define establishes the decimal
+ * point of the time_offset variable which represents the current offset
+ * with respect to standard time. The FINEUSEC define represents 1 usec in
+ * scaled units.
+ *
+ * SHIFT_USEC defines the scaling (shift) of the time_freq and
+ * time_tolerance variables, which represent the current frequency
+ * offset and maximum frequency tolerance.
+ *
+ * FINEUSEC is 1 us in SHIFT_UPDATE units of the time_phase variable.
+ */
+#define SHIFT_SCALE 22 /* phase scale (shift) */
+#define SHIFT_UPDATE (SHIFT_KG + MAXTC) /* time offset scale (shift) */
+#define SHIFT_USEC 16 /* frequency offset scale (shift) */
+#define FINEUSEC (1L << SHIFT_SCALE) /* 1 us in phase units */
+
+#define MAXPHASE 512000L /* max phase error (us) */
+#define MAXFREQ (512L << SHIFT_USEC) /* max frequency error (ppm) */
+#define MAXTIME (200L << PPS_AVG) /* max PPS error (jitter) (200 us) */
+#define MINSEC 16L /* min interval between updates (s) */
+#define MAXSEC 1200L /* max interval between updates (s) */
+#define NTP_PHASE_LIMIT (MAXPHASE << 5) /* beyond max. dispersion */
+
+/*
+ * The following defines are used only if a pulse-per-second (PPS)
+ * signal is available and connected via a modem control lead, such as
+ * produced by the optional ppsclock feature incorporated in the Sun
+ * asynch driver. They establish the design parameters of the frequency-
+ * lock loop used to discipline the CPU clock oscillator to the PPS
+ * signal.
+ *
+ * PPS_AVG is the averaging factor for the frequency loop, as well as
+ * the time and frequency dispersion.
+ *
+ * PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum
+ * calibration intervals, respectively, in seconds as a power of two.
+ *
+ * PPS_VALID is the maximum interval before the PPS signal is considered
+ * invalid and protocol updates used directly instead.
+ *
+ * MAXGLITCH is the maximum interval before a time offset of more than
+ * MAXTIME is believed.
+ */
+#define PPS_AVG 2 /* pps averaging constant (shift) */
+#define PPS_SHIFT 2 /* min interval duration (s) (shift) */
+#define PPS_SHIFTMAX 8 /* max interval duration (s) (shift) */
+#define PPS_VALID 120 /* pps signal watchdog max (s) */
+#define MAXGLITCH 30 /* pps signal glitch max (s) */
+
+/*
+ * Pick up the architecture specific timex specifications
+ */
+#include <asm/timex.h>
+
+/* LATCH is used in the interval timer and ftape setup. */
+#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */
+
+/*
+ * syscall interface - used (mainly by NTP daemon)
+ * to discipline kernel clock oscillator
+ */
+struct timex {
+ unsigned int modes; /* mode selector */
+ long offset; /* time offset (usec) */
+ long freq; /* frequency offset (scaled ppm) */
+ long maxerror; /* maximum error (usec) */
+ long esterror; /* estimated error (usec) */
+ int status; /* clock command/status */
+ long constant; /* pll time constant */
+ long precision; /* clock precision (usec) (read only) */
+ long tolerance; /* clock frequency tolerance (ppm)
+ * (read only)
+ */
+ struct timeval time; /* (read only) */
+ long tick; /* (modified) usecs between clock ticks */
+
+ long ppsfreq; /* pps frequency (scaled ppm) (ro) */
+ long jitter; /* pps jitter (us) (ro) */
+ int shift; /* interval duration (s) (shift) (ro) */
+ long stabil; /* pps stability (scaled ppm) (ro) */
+ long jitcnt; /* jitter limit exceeded (ro) */
+ long calcnt; /* calibration intervals (ro) */
+ long errcnt; /* calibration errors (ro) */
+ long stbcnt; /* stability limit exceeded (ro) */
+
+ int :32; int :32; int :32; int :32;
+ int :32; int :32; int :32; int :32;
+ int :32; int :32; int :32; int :32;
+};
+
+/*
+ * Mode codes (timex.mode)
+ */
+#define ADJ_OFFSET 0x0001 /* time offset */
+#define ADJ_FREQUENCY 0x0002 /* frequency offset */
+#define ADJ_MAXERROR 0x0004 /* maximum time error */
+#define ADJ_ESTERROR 0x0008 /* estimated time error */
+#define ADJ_STATUS 0x0010 /* clock status */
+#define ADJ_TIMECONST 0x0020 /* pll time constant */
+#define ADJ_TICK 0x4000 /* tick value */
+#define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */
+
+/* xntp 3.4 compatibility names */
+#define MOD_OFFSET ADJ_OFFSET
+#define MOD_FREQUENCY ADJ_FREQUENCY
+#define MOD_MAXERROR ADJ_MAXERROR
+#define MOD_ESTERROR ADJ_ESTERROR
+#define MOD_STATUS ADJ_STATUS
+#define MOD_TIMECONST ADJ_TIMECONST
+#define MOD_CLKB ADJ_TICK
+#define MOD_CLKA ADJ_OFFSET_SINGLESHOT /* 0x8000 in original */
+
+
+/*
+ * Status codes (timex.status)
+ */
+#define STA_PLL 0x0001 /* enable PLL updates (rw) */
+#define STA_PPSFREQ 0x0002 /* enable PPS freq discipline (rw) */
+#define STA_PPSTIME 0x0004 /* enable PPS time discipline (rw) */
+#define STA_FLL 0x0008 /* select frequency-lock mode (rw) */
+
+#define STA_INS 0x0010 /* insert leap (rw) */
+#define STA_DEL 0x0020 /* delete leap (rw) */
+#define STA_UNSYNC 0x0040 /* clock unsynchronized (rw) */
+#define STA_FREQHOLD 0x0080 /* hold frequency (rw) */
+
+#define STA_PPSSIGNAL 0x0100 /* PPS signal present (ro) */
+#define STA_PPSJITTER 0x0200 /* PPS signal jitter exceeded (ro) */
+#define STA_PPSWANDER 0x0400 /* PPS signal wander exceeded (ro) */
+#define STA_PPSERROR 0x0800 /* PPS signal calibration error (ro) */
+
+#define STA_CLOCKERR 0x1000 /* clock hardware fault (ro) */
+
+#define STA_RONLY (STA_PPSSIGNAL | STA_PPSJITTER | STA_PPSWANDER | \
+ STA_PPSERROR | STA_CLOCKERR) /* read-only bits */
+
+/*
+ * Clock states (time_state)
+ */
+#define TIME_OK 0 /* clock synchronized, no leap second */
+#define TIME_INS 1 /* insert leap second */
+#define TIME_DEL 2 /* delete leap second */
+#define TIME_OOP 3 /* leap second in progress */
+#define TIME_WAIT 4 /* leap second has occurred */
+#define TIME_ERROR 5 /* clock not synchronized */
+#define TIME_BAD TIME_ERROR /* bw compat */
+
+#ifdef __KERNEL__
+/*
+ * kernel variables
+ * Note: maximum error = NTP synch distance = dispersion + delay / 2;
+ * estimated error = NTP dispersion.
+ */
+extern long tick; /* timer interrupt period */
+extern int tickadj; /* amount of adjustment per tick */
+
+/*
+ * phase-lock loop variables
+ */
+extern int time_state; /* clock status */
+extern int time_status; /* clock synchronization status bits */
+extern long time_offset; /* time adjustment (us) */
+extern long time_constant; /* pll time constant */
+extern long time_tolerance; /* frequency tolerance (ppm) */
+extern long time_precision; /* clock precision (us) */
+extern long time_maxerror; /* maximum error */
+extern long time_esterror; /* estimated error */
+
+extern long time_phase; /* phase offset (scaled us) */
+extern long time_freq; /* frequency offset (scaled ppm) */
+extern long time_adj; /* tick adjust (scaled 1 / HZ) */
+extern long time_reftime; /* time at last adjustment (s) */
+
+extern long time_adjust; /* The amount of adjtime left */
+
+/* interface variables pps->timer interrupt */
+extern long pps_offset; /* pps time offset (us) */
+extern long pps_jitter; /* time dispersion (jitter) (us) */
+extern long pps_freq; /* frequency offset (scaled ppm) */
+extern long pps_stabil; /* frequency dispersion (scaled ppm) */
+extern long pps_valid; /* pps signal watchdog counter */
+
+/* interface variables pps->adjtimex */
+extern int pps_shift; /* interval duration (s) (shift) */
+extern long pps_jitcnt; /* jitter limit exceeded */
+extern long pps_calcnt; /* calibration intervals */
+extern long pps_errcnt; /* calibration errors */
+extern long pps_stbcnt; /* stability limit exceeded */
+
+#endif /* KERNEL */
+
+#endif /* LINUX_TIMEX_H */
diff --git a/xen/include/xeno/tqueue.h b/xen/include/xeno/tqueue.h
new file mode 100644
index 0000000000..4a730f0ad9
--- /dev/null
+++ b/xen/include/xeno/tqueue.h
@@ -0,0 +1,125 @@
+/*
+ * tqueue.h --- task queue handling for Linux.
+ *
+ * Mostly based on a proposed bottom-half replacement code written by
+ * Kai Petzke, wpp@marie.physik.tu-berlin.de.
+ *
+ * Modified for use in the Linux kernel by Theodore Ts'o,
+ * tytso@mit.edu. Any bugs are my fault, not Kai's.
+ *
+ * The original comment follows below.
+ */
+
+#ifndef _LINUX_TQUEUE_H
+#define _LINUX_TQUEUE_H
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <asm/bitops.h>
+#include <asm/system.h>
+
+/*
+ * New proposed "bottom half" handlers:
+ * (C) 1994 Kai Petzke, wpp@marie.physik.tu-berlin.de
+ *
+ * Advantages:
+ * - Bottom halfs are implemented as a linked list. You can have as many
+ * of them, as you want.
+ * - No more scanning of a bit field is required upon call of a bottom half.
+ * - Support for chained bottom half lists. The run_task_queue() function can be
+ * used as a bottom half handler. This is for example useful for bottom
+ * halfs, which want to be delayed until the next clock tick.
+ *
+ * Notes:
+ * - Bottom halfs are called in the reverse order that they were linked into
+ * the list.
+ */
+
+struct tq_struct {
+ struct list_head list; /* linked list of active bh's */
+ unsigned long sync; /* must be initialized to zero */
+ void (*routine)(void *); /* function to call */
+ void *data; /* argument to function */
+};
+
+/*
+ * Emit code to initialise a tq_struct's routine and data pointers
+ */
+#define PREPARE_TQUEUE(_tq, _routine, _data) \
+ do { \
+ (_tq)->routine = _routine; \
+ (_tq)->data = _data; \
+ } while (0)
+
+/*
+ * Emit code to initialise all of a tq_struct
+ */
+#define INIT_TQUEUE(_tq, _routine, _data) \
+ do { \
+ INIT_LIST_HEAD(&(_tq)->list); \
+ (_tq)->sync = 0; \
+ PREPARE_TQUEUE((_tq), (_routine), (_data)); \
+ } while (0)
+
+typedef struct list_head task_queue;
+
+#define DECLARE_TASK_QUEUE(q) LIST_HEAD(q)
+#define TQ_ACTIVE(q) (!list_empty(&q))
+
+extern task_queue tq_timer, tq_immediate, tq_disk;
+
+/*
+ * To implement your own list of active bottom halfs, use the following
+ * two definitions:
+ *
+ * DECLARE_TASK_QUEUE(my_tqueue);
+ * struct tq_struct my_task = {
+ * routine: (void (*)(void *)) my_routine,
+ * data: &my_data
+ * };
+ *
+ * To activate a bottom half on a list, use:
+ *
+ * queue_task(&my_task, &my_tqueue);
+ *
+ * To later run the queued tasks use
+ *
+ * run_task_queue(&my_tqueue);
+ *
+ * This allows you to do deferred processing. For example, you could
+ * have a task queue called tq_timer, which is executed within the timer
+ * interrupt.
+ */
+
+extern spinlock_t tqueue_lock;
+
+/*
+ * Queue a task on a tq. Return non-zero if it was successfully
+ * added.
+ */
+static inline int queue_task(struct tq_struct *bh_pointer, task_queue *bh_list)
+{
+ int ret = 0;
+ if (!test_and_set_bit(0,&bh_pointer->sync)) {
+ unsigned long flags;
+ spin_lock_irqsave(&tqueue_lock, flags);
+ list_add_tail(&bh_pointer->list, bh_list);
+ spin_unlock_irqrestore(&tqueue_lock, flags);
+ ret = 1;
+ }
+ return ret;
+}
+
+/*
+ * Call all "bottom halfs" on a given list.
+ */
+
+extern void __run_task_queue(task_queue *list);
+
+static inline void run_task_queue(task_queue *list)
+{
+ if (TQ_ACTIVE(*list))
+ __run_task_queue(list);
+}
+
+#endif /* _LINUX_TQUEUE_H */
diff --git a/xen/include/xeno/types.h b/xen/include/xeno/types.h
new file mode 100644
index 0000000000..c5f8d5586d
--- /dev/null
+++ b/xen/include/xeno/types.h
@@ -0,0 +1,50 @@
+#ifndef __TYPES_H__
+#define __TYPES_H__
+
+#include <asm/types.h>
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+#define INT_MAX ((int)(~0U>>1))
+#define INT_MIN (-INT_MAX - 1)
+#define UINT_MAX (~0U)
+#define LONG_MAX ((long)(~0UL>>1))
+#define LONG_MIN (-LONG_MAX - 1)
+#define ULONG_MAX (~0UL)
+
+typedef unsigned int size_t;
+
+/* bsd */
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+typedef unsigned int u_int;
+typedef unsigned long u_long;
+
+/* sysv */
+typedef unsigned char unchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef __u8 u_int8_t;
+typedef __s8 int8_t;
+typedef __u16 u_int16_t;
+typedef __s16 int16_t;
+typedef __u32 u_int32_t;
+typedef __s32 int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef __u8 uint8_t;
+typedef __u16 uint16_t;
+typedef __u32 uint32_t;
+typedef __u64 uint64_t;
+
+
+
+#endif /* __TYPES_H__ */
diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h
new file mode 100644
index 0000000000..7b56caaabe
--- /dev/null
+++ b/xen/include/xeno/vif.h
@@ -0,0 +1,94 @@
+/* vif.h
+ *
+ * This is the hypervisor end of the network code. The net_ring structure
+ * stored in each vif is placed on a shared page to interact with the guest VM.
+ *
+ * Copyright (c) 2002, A K Warfield and K A Fraser
+ */
+
+/* virtual network interface struct and associated defines. */
+/* net_vif_st is the larger struct that describes a virtual network interface
+ * it contains a pointer to the net_ring_t structure that needs to be on a
+ * shared page between the hypervisor and guest. The vif struct is private
+ * to the hypervisor and is used primarily as a container to allow routing
+ * and interface administration. This define should eventually be moved to
+ * a non-shared interface file, as it is of no relevance to the guest.
+ */
+
+#include <hypervisor-ifs/network.h>
+#include <xeno/skbuff.h>
+
+/*
+ * shadow ring structures are used to protect the descriptors from
+ * tampering after they have been passed to the hypervisor.
+ *
+ * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h.
+ */
+
+typedef struct rx_shadow_entry_st {
+ unsigned long addr;
+ unsigned short size;
+ unsigned short status;
+ unsigned long flush_count;
+} rx_shadow_entry_t;
+
+typedef struct tx_shadow_entry_st {
+ void *header;
+ unsigned long payload;
+ unsigned short size;
+ unsigned short status;
+} tx_shadow_entry_t;
+
+typedef struct net_shadow_ring_st {
+ rx_shadow_entry_t *rx_ring;
+ tx_shadow_entry_t *tx_ring;
+
+ /*
+ * Private copy of producer. Follows guest OS version, but never
+ * catches up with our consumer index.
+ */
+ unsigned int rx_prod;
+ /* Points at next buffer to be filled by NIC. Chases rx_prod. */
+ unsigned int rx_idx;
+ /* Points at next buffer to be returned to the guest OS. Chases rx_idx. */
+ unsigned int rx_cons;
+
+ /*
+ * Private copy of producer. Follows guest OS version, but never
+ * catches up with our consumer index.
+ */
+ unsigned int tx_prod;
+ /* Points at next buffer to be scheduled. Chases tx_prod. */
+ unsigned int tx_idx;
+ /* Points at next buffer to be returned to the guest OS. Chases tx_idx. */
+ unsigned int tx_cons;
+} net_shadow_ring_t;
+
+typedef struct net_vif_st {
+ net_ring_t *net_ring;
+ net_shadow_ring_t *shadow_ring;
+ int id;
+ struct task_struct *domain;
+ struct list_head list;
+} net_vif_t;
+
+/* VIF-related defines. */
+#define MAX_GUEST_VIFS 2 // each VIF is a small overhead in task_struct
+#define MAX_SYSTEM_VIFS 256
+
+/* vif globals */
+extern int sys_vif_count;
+extern net_vif_t *sys_vif_list[];
+
+/* vif prototypes */
+net_vif_t *create_net_vif(int domain);
+void destroy_net_vif(struct task_struct *p);
+void add_default_net_rule(int vif_id, u32 ipaddr);
+int __net_get_target_vif(u8 *data, unsigned int len, int src_vif);
+void add_default_net_rule(int vif_id, u32 ipaddr);
+
+#define net_get_target_vif(skb) __net_get_target_vif(skb->data, skb->len, skb->src_vif)
+/* status fields per-descriptor:
+ */
+
+
diff --git a/xen/net/Makefile b/xen/net/Makefile
new file mode 100644
index 0000000000..e9a8eba3d7
--- /dev/null
+++ b/xen/net/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o network.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen/net/dev.c b/xen/net/dev.c
new file mode 100644
index 0000000000..c42e516686
--- /dev/null
+++ b/xen/net/dev.c
@@ -0,0 +1,2019 @@
+/*
+ * NET3 Protocol independent device support routines.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/lib.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/brlock.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <linux/event.h>
+#include <asm/domain_page.h>
+#include <asm/pgalloc.h>
+
+#define BUG_TRAP ASSERT
+#define notifier_call_chain(_a,_b,_c) ((void)0)
+#define rtmsg_ifinfo(_a,_b,_c) ((void)0)
+#define rtnl_lock() ((void)0)
+#define rtnl_unlock() ((void)0)
+
+#if 1
+#define DPRINTK(_f, _a...) printk(_f , ## _a)
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1))
+#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1))
+#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
+#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+
+struct net_device *the_dev = NULL;
+
+/*
+ * Transmitted packets are fragmented, so we can copy the important headesr
+ * before checking them for validity. Avoids need for page protection.
+ */
+/* Ethernet + IP headers */
+#define PKT_PROT_LEN (ETH_HLEN + 20)
+static kmem_cache_t *net_header_cachep;
+
+/**
+ * __dev_get_by_name - find a device by its name
+ * @name: name to find
+ *
+ * Find an interface by name. Must be called under RTNL semaphore
+ * or @dev_base_lock. If the name is found a pointer to the device
+ * is returned. If the name is not found then %NULL is returned. The
+ * reference counters are not incremented so the caller must be
+ * careful with locks.
+ */
+
+
+struct net_device *__dev_get_by_name(const char *name)
+{
+ struct net_device *dev;
+
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
+ if (strncmp(dev->name, name, IFNAMSIZ) == 0)
+ return dev;
+ }
+ return NULL;
+}
+
+/**
+ * dev_get_by_name - find a device by its name
+ * @name: name to find
+ *
+ * Find an interface by name. This can be called from any
+ * context and does its own locking. The returned handle has
+ * the usage count incremented and the caller must use dev_put() to
+ * release it when it is no longer needed. %NULL is returned if no
+ * matching device is found.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_name(name);
+ if (dev)
+ dev_hold(dev);
+ read_unlock(&dev_base_lock);
+ return dev;
+}
+
+/**
+ * dev_get - test if a device exists
+ * @name: name to test for
+ *
+ * Test if a name exists. Returns true if the name is found. In order
+ * to be sure the name is not allocated or removed during the test the
+ * caller must hold the rtnl semaphore.
+ *
+ * This function primarily exists for back compatibility with older
+ * drivers.
+ */
+
+int dev_get(const char *name)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_name(name);
+ read_unlock(&dev_base_lock);
+ return dev != NULL;
+}
+
+/**
+ * __dev_get_by_index - find a device by its ifindex
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not
+ * had its reference counter increased so the caller must be careful
+ * about locking. The caller must hold either the RTNL semaphore
+ * or @dev_base_lock.
+ */
+
+struct net_device * __dev_get_by_index(int ifindex)
+{
+ struct net_device *dev;
+
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
+ if (dev->ifindex == ifindex)
+ return dev;
+ }
+ return NULL;
+}
+
+
+/**
+ * dev_get_by_index - find a device by its ifindex
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns NULL if the device
+ * is not found or a pointer to the device. The device returned has
+ * had a reference added and the pointer is safe until the user calls
+ * dev_put to indicate they have finished with it.
+ */
+
+struct net_device * dev_get_by_index(int ifindex)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_index(ifindex);
+ if (dev)
+ dev_hold(dev);
+ read_unlock(&dev_base_lock);
+ return dev;
+}
+
+/**
+ * dev_getbyhwaddr - find a device by its hardware address
+ * @type: media type of device
+ * @ha: hardware address
+ *
+ * Search for an interface by MAC address. Returns NULL if the device
+ * is not found or a pointer to the device. The caller must hold the
+ * rtnl semaphore. The returned device has not had its ref count increased
+ * and the caller must therefore be careful about locking
+ *
+ * BUGS:
+ * If the API was consistent this would be __dev_get_by_hwaddr
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+ struct net_device *dev;
+
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
+ if (dev->type == type &&
+ memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
+ return dev;
+ }
+ return NULL;
+}
+
+/**
+ * dev_alloc_name - allocate a name for a device
+ * @dev: device
+ * @name: name format string
+ *
+ * Passed a format string - eg "lt%d" it will try and find a suitable
+ * id. Not efficient for many devices, not called a lot. The caller
+ * must hold the dev_base or rtnl lock while allocating the name and
+ * adding the device in order to avoid duplicates. Returns the number
+ * of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+ int i;
+ char buf[32];
+ char *p;
+
+ /*
+ * Verify the string as this thing may have come from
+ * the user. There must be either one "%d" and no other "%"
+ * characters, or no "%" characters at all.
+ */
+ p = strchr(name, '%');
+ if (p && (p[1] != 'd' || strchr(p+2, '%')))
+ return -EINVAL;
+
+ /*
+ * If you need over 100 please also fix the algorithm...
+ */
+ for (i = 0; i < 100; i++) {
+ snprintf(buf,sizeof(buf),name,i);
+ if (__dev_get_by_name(buf) == NULL) {
+ strcpy(dev->name, buf);
+ return i;
+ }
+ }
+ return -ENFILE; /* Over 100 of the things .. bail out! */
+}
+
+/**
+ * dev_alloc - allocate a network device and name
+ * @name: name format string
+ * @err: error return pointer
+ *
+ * Passed a format string, eg. "lt%d", it will allocate a network device
+ * and space for the name. %NULL is returned if no memory is available.
+ * If the allocation succeeds then the name is assigned and the
+ * device pointer returned. %NULL is returned if the name allocation
+ * failed. The cause of an error is returned as a negative errno code
+ * in the variable @err points to.
+ *
+ * The caller must hold the @dev_base or RTNL locks when doing this in
+ * order to avoid duplicate name allocations.
+ */
+
+struct net_device *dev_alloc(const char *name, int *err)
+{
+ struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
+ if (dev == NULL) {
+ *err = -ENOBUFS;
+ return NULL;
+ }
+ memset(dev, 0, sizeof(struct net_device));
+ *err = dev_alloc_name(dev, name);
+ if (*err < 0) {
+ kfree(dev);
+ return NULL;
+ }
+ return dev;
+}
+
+/**
+ * netdev_state_change - device changes state
+ * @dev: device to cause notification
+ *
+ * Called to indicate a device has changed state. This function calls
+ * the notifier chains for netdev_chain and sends a NEWLINK message
+ * to the routing socket.
+ */
+
+void netdev_state_change(struct net_device *dev)
+{
+ if (dev->flags&IFF_UP) {
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+ }
+}
+
+
+#ifdef CONFIG_KMOD
+
+/**
+ * dev_load - load a network module
+ * @name: name of interface
+ *
+ * If a network interface is not present and the process has suitable
+ * privileges this function loads the module. If module loading is not
+ * available in this kernel then it becomes a nop.
+ */
+
+void dev_load(const char *name)
+{
+ if (!dev_get(name) && capable(CAP_SYS_MODULE))
+ request_module(name);
+}
+
+#else
+
+extern inline void dev_load(const char *unused){;}
+
+#endif
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+ printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
+ skb->dev ? skb->dev->name : "NULL!!!");
+ kfree_skb(skb);
+ return 1;
+}
+
+/**
+ * dev_open - prepare an interface for use.
+ * @dev: device to open
+ *
+ * Takes a device from down to up state. The device's private open
+ * function is invoked and then the multicast lists are loaded. Finally
+ * the device is moved into the up state and a %NETDEV_UP message is
+ * sent to the netdev notifier chain.
+ *
+ * Calling this function on an active interface is a nop. On a failure
+ * a negative errno code is returned.
+ */
+
+int dev_open(struct net_device *dev)
+{
+ int ret = 0;
+
+ /*
+ * Is it already up?
+ */
+
+ if (dev->flags&IFF_UP)
+ return 0;
+
+ /*
+ * Is it even present?
+ */
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ /*
+ * Call device private open method
+ */
+ if (try_inc_mod_count(dev->owner)) {
+ if (dev->open) {
+ ret = dev->open(dev);
+ if (ret != 0 && dev->owner)
+ __MOD_DEC_USE_COUNT(dev->owner);
+ }
+ } else {
+ ret = -ENODEV;
+ }
+
+ /*
+ * If it went open OK then:
+ */
+
+ if (ret == 0)
+ {
+ /*
+ * Set the flags.
+ */
+ dev->flags |= IFF_UP;
+
+ set_bit(__LINK_STATE_START, &dev->state);
+
+ /*
+ * Initialize multicasting status
+ */
+ dev_mc_upload(dev);
+
+ /*
+ * Wakeup transmit queue engine
+ */
+ dev_activate(dev);
+
+ /*
+ * ... and announce new interface.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+ }
+ return(ret);
+}
+
+
+/**
+ * dev_close - shutdown an interface.
+ * @dev: device to shutdown
+ *
+ * This function moves an active device into down state. A
+ * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ * chain.
+ */
+
+int dev_close(struct net_device *dev)
+{
+ if (!(dev->flags&IFF_UP))
+ return 0;
+
+ /*
+ * Tell people we are going down, so that they can
+ * prepare to death, when device is still operating.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+
+ dev_deactivate(dev);
+
+ clear_bit(__LINK_STATE_START, &dev->state);
+
+ /*
+ * Call the device specific close. This cannot fail.
+ * Only if device is UP
+ *
+ * We allow it to be called even after a DETACH hot-plug
+ * event.
+ */
+
+ if (dev->stop)
+ dev->stop(dev);
+
+ /*
+ * Device is now down.
+ */
+
+ dev->flags &= ~IFF_UP;
+
+ /*
+ * Tell people we are down
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+ /*
+ * Drop the module refcount
+ */
+ if (dev->owner)
+ __MOD_DEC_USE_COUNT(dev->owner);
+
+ return(0);
+}
+
+
+#ifdef CONFIG_HIGHMEM
+/* Actually, we should eliminate this check as soon as we know, that:
+ * 1. IOMMU is present and allows to map all the memory.
+ * 2. No high memory really exists on this machine.
+ */
+
+static inline int
+illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+{
+ int i;
+
+ if (dev->features&NETIF_F_HIGHDMA)
+ return 0;
+
+ for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
+ if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
+ return 1;
+
+ return 0;
+}
+#else
+#define illegal_highdma(dev, skb) (0)
+#endif
+
+
+/*=======================================================================
+ Receiver routines
+ =======================================================================*/
+
+struct netif_rx_stats netdev_rx_stat[NR_CPUS];
+
+void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
+{
+ net_shadow_ring_t *shadow_ring;
+ rx_shadow_entry_t *rx;
+ unsigned long *g_pte;
+ struct pfn_info *g_pfn, *h_pfn;
+ unsigned int i;
+
+ memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
+ if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
+ {
+ memset(skb->nh.raw + 18, 0, ETH_ALEN);
+ }
+ shadow_ring = vif->shadow_ring;
+
+ if ( (i = shadow_ring->rx_idx) == shadow_ring->rx_prod )
+ {
+ return;
+ }
+
+ if ( shadow_ring->rx_ring[i].status != RING_STATUS_OK )
+ {
+ DPRINTK("Bad buffer in deliver_packet()\n");
+ shadow_ring->rx_idx = RX_RING_INC(i);
+ return;
+ }
+
+ rx = shadow_ring->rx_ring + i;
+ if ( (skb->len + ETH_HLEN) < rx->size )
+ rx->size = skb->len + ETH_HLEN;
+
+ g_pte = map_domain_mem(rx->addr);
+
+ g_pfn = frame_table + (*g_pte >> PAGE_SHIFT);
+ h_pfn = skb->pf;
+
+ h_pfn->tot_count = h_pfn->type_count = 1;
+ g_pfn->tot_count = g_pfn->type_count = 0;
+ h_pfn->flags = g_pfn->flags & (~PG_type_mask);
+
+ if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page;
+ g_pfn->flags = 0;
+
+ /* Point the guest at the new machine frame. */
+ machine_to_phys_mapping[h_pfn - frame_table]
+ = machine_to_phys_mapping[g_pfn - frame_table];
+ *g_pte = (*g_pte & ~PAGE_MASK)
+ | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
+ *g_pte |= _PAGE_PRESENT;
+
+ unmap_domain_mem(g_pte);
+
+ /* Our skbuff now points at the guest's old frame. */
+ skb->pf = g_pfn;
+
+ shadow_ring->rx_idx = RX_RING_INC(i);
+}
+
+/**
+ * netif_rx - post buffer to the network code
+ * @skb: buffer to post
+ *
+ * This function receives a packet from a device driver and queues it for
+ * the upper (protocol) levels to process. It always succeeds. The buffer
+ * may be dropped during processing for congestion control or by the
+ * protocol layers.
+ *
+ * return values:
+ * NET_RX_SUCCESS (no congestion)
+ * NET_RX_DROP (packet was dropped)
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+#ifdef CONFIG_SMP
+ unsigned long cpu_mask;
+#endif
+
+ struct task_struct *p;
+ int this_cpu = smp_processor_id();
+ unsigned long flags;
+ net_vif_t *vif;
+
+ local_irq_save(flags);
+
+ ASSERT(skb->skb_type == SKB_ZERO_COPY);
+ ASSERT((skb->data - skb->head) == (18 + ETH_HLEN));
+
+ skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
+
+ /*
+ * remapping this address really screws up all the skb pointers. We
+ * need to map them all here sufficiently to get the packet
+ * demultiplexed. this remapping happens more than once in the code and
+ * is grim. It will be fixed in a later update -- drivers should be
+ * able to align the packet arbitrarily.
+ */
+
+ skb->data = skb->head;
+ skb_reserve(skb,18); /* 18 is the 16 from dev_alloc_skb plus 2 for
+ IP header alignment. */
+ skb->mac.raw = skb->data;
+ skb->data += ETH_HLEN;
+ skb->nh.raw = skb->data;
+
+ netdev_rx_stat[this_cpu].total++;
+
+ if ( skb->src_vif == VIF_UNKNOWN_INTERFACE )
+ skb->src_vif = VIF_PHYSICAL_INTERFACE;
+
+ if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE )
+ skb->dst_vif = __net_get_target_vif(skb->mac.raw,
+ skb->len, skb->src_vif);
+
+ if ( (vif = sys_vif_list[skb->dst_vif]) == NULL )
+ goto drop;
+
+ /*
+ * This lock-and-walk of the task list isn't really necessary, and is
+ * an artifact of the old code. The vif contains a pointer to the skb
+ * list we are going to queue the packet in, so the lock and the inner
+ * loop could be removed. The argument against this is a possible race
+ * in which a domain is killed as packets are being delivered to it.
+ * This would result in the dest vif vanishing before we can deliver to
+ * it.
+ */
+
+ if ( skb->dst_vif >= VIF_PHYSICAL_INTERFACE )
+ {
+ read_lock(&tasklist_lock);
+ p = &idle0_task;
+ do {
+ if ( p != vif->domain ) continue;
+ deliver_packet(skb, vif);
+ cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
+ read_unlock(&tasklist_lock);
+ goto found;
+ }
+ while ( (p = p->next_task) != &idle0_task );
+ read_unlock(&tasklist_lock);
+ goto drop;
+ }
+
+ drop:
+ netdev_rx_stat[this_cpu].dropped++;
+ unmap_domain_mem(skb->head);
+ kfree_skb(skb);
+ local_irq_restore(flags);
+ return NET_RX_DROP;
+
+ found:
+ unmap_domain_mem(skb->head);
+ skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
+ kfree_skb(skb);
+ hyp_event_notify(cpu_mask);
+ local_irq_restore(flags);
+ return NET_RX_SUCCESS;
+}
+
+
+/*************************************************************
+ * NEW TRANSMIT SCHEDULER
+ */
+
+struct list_head net_schedule_list;
+spinlock_t net_schedule_list_lock;
+
+static int __on_net_schedule_list(net_vif_t *vif)
+{
+ return vif->list.next != NULL;
+}
+
+static void remove_from_net_schedule_list(net_vif_t *vif)
+{
+ unsigned long flags;
+ if ( !__on_net_schedule_list(vif) ) return;
+ spin_lock_irqsave(&net_schedule_list_lock, flags);
+ if ( __on_net_schedule_list(vif) )
+ {
+ list_del(&vif->list);
+ vif->list.next = NULL;
+ }
+ spin_unlock_irqrestore(&net_schedule_list_lock, flags);
+}
+
+static void add_to_net_schedule_list_tail(net_vif_t *vif)
+{
+ unsigned long flags;
+ if ( __on_net_schedule_list(vif) ) return;
+ spin_lock_irqsave(&net_schedule_list_lock, flags);
+ if ( !__on_net_schedule_list(vif) )
+ {
+ list_add_tail(&vif->list, &net_schedule_list);
+ }
+ spin_unlock_irqrestore(&net_schedule_list_lock, flags);
+}
+
+
+/* Destructor function for tx skbs. */
+static void tx_skb_release(struct sk_buff *skb)
+{
+ int i;
+ net_vif_t *vif = sys_vif_list[skb->src_vif];
+
+ for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
+ put_page_tot(skb_shinfo(skb)->frags[i].page);
+
+ if ( skb->skb_type == SKB_NODATA )
+ kmem_cache_free(net_header_cachep, skb->head);
+
+ skb_shinfo(skb)->nr_frags = 0;
+
+ /* This would mean that the guest OS has fiddled with our index. */
+ if ( vif->shadow_ring->tx_cons != vif->net_ring->tx_cons )
+ DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
+ vif->shadow_ring->tx_cons, vif->net_ring->tx_cons);
+
+ /*
+ * XXX This assumes that, per vif, SKBs are processed in-order!
+ * Also assumes no concurrency. This is safe because each vif
+ * maps to one NIC. This is executed in NIC interrupt code, so we have
+ * mutual exclusion from do_IRQ().
+ */
+ vif->shadow_ring->tx_cons = TX_RING_INC(vif->shadow_ring->tx_cons);
+ vif->net_ring->tx_cons = vif->shadow_ring->tx_cons;
+ if ( vif->net_ring->tx_cons == vif->net_ring->tx_event )
+ set_bit(_EVENT_NET_TX,
+ &sys_vif_list[skb->src_vif]->domain->shared_info->events);
+}
+
+
+static void net_tx_action(unsigned long unused)
+{
+ struct net_device *dev = the_dev;
+ struct list_head *ent;
+ struct sk_buff *skb;
+ net_vif_t *vif;
+ tx_shadow_entry_t *tx;
+ int pending_bytes = 0, pending_bytes_max = 1;
+
+ spin_lock(&dev->xmit_lock);
+ while ( !netif_queue_stopped(dev) &&
+ (pending_bytes < pending_bytes_max) &&
+ !list_empty(&net_schedule_list) )
+ {
+ /* Get a vif from the list with work to do. */
+ ent = net_schedule_list.next;
+ vif = list_entry(ent, net_vif_t, list);
+ remove_from_net_schedule_list(vif);
+ if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod )
+ continue;
+
+ /* Check the chosen entry is good. */
+ tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
+ if ( tx->status != RING_STATUS_OK ) goto skip_desc;
+
+ if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
+ {
+ add_to_net_schedule_list_tail(vif);
+ printk("Out of memory in net_tx_action()!\n");
+ goto out;
+ }
+
+ skb->destructor = tx_skb_release;
+
+ skb->head = skb->data = tx->header;
+ skb->end = skb->tail = skb->head + PKT_PROT_LEN;
+
+ skb->dev = the_dev;
+ skb->src_vif = vif->id;
+ skb->dst_vif = VIF_PHYSICAL_INTERFACE;
+ skb->mac.raw = skb->data;
+
+ skb_shinfo(skb)->frags[0].page = frame_table +
+ (tx->payload >> PAGE_SHIFT);
+ skb_shinfo(skb)->frags[0].size = tx->size - PKT_PROT_LEN;
+ skb_shinfo(skb)->frags[0].page_offset = tx->payload & ~PAGE_MASK;
+ skb_shinfo(skb)->nr_frags = 1;
+
+ skb->data_len = tx->size - PKT_PROT_LEN;
+ skb->len = tx->size;
+
+ /* Transmit should always work, or the queue would be stopped. */
+ if ( dev->hard_start_xmit(skb, dev) != 0 )
+ {
+ add_to_net_schedule_list_tail(vif);
+ printk("Weird failure in hard_start_xmit!\n");
+ goto out;
+ }
+
+ skip_desc:
+ vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
+ if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
+ add_to_net_schedule_list_tail(vif);
+ }
+ out:
+ spin_unlock(&dev->xmit_lock);
+}
+
+DECLARE_TASKLET_DISABLED(net_tx_tasklet, net_tx_action, 0);
+
+
+/*
+ * update_shared_ring(void)
+ *
+ * This replaces flush_rx_queue as the guest event handler to move packets
+ * queued in the guest ring up to the guest. Really, the packet is already
+ * there, it was page flipped in deliver_packet, but this moves the ring
+ * descriptor across from the shadow ring and increments the pointers.
+ */
+
+void update_shared_ring(void)
+{
+ rx_shadow_entry_t *rx;
+ shared_info_t *s = current->shared_info;
+ net_ring_t *net_ring;
+ net_shadow_ring_t *shadow_ring;
+ unsigned int nvif;
+
+ clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
+
+ for ( nvif = 0; nvif < current->num_net_vifs; nvif++ )
+ {
+ net_ring = current->net_vif_list[nvif]->net_ring;
+ shadow_ring = current->net_vif_list[nvif]->shadow_ring;
+
+ /* This would mean that the guest OS has fiddled with our index. */
+ if ( shadow_ring->rx_cons != net_ring->rx_cons )
+ DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
+ shadow_ring->rx_cons, net_ring->rx_cons);
+
+ while ( shadow_ring->rx_cons != shadow_ring->rx_idx )
+ {
+ rx = shadow_ring->rx_ring + shadow_ring->rx_cons;
+ copy_to_user(net_ring->rx_ring + net_ring->rx_cons, rx,
+ sizeof(rx_entry_t));
+
+ if ( rx->flush_count == tlb_flush_count[smp_processor_id()] )
+ __flush_tlb();
+
+ shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons);
+
+ if ( shadow_ring->rx_cons == net_ring->rx_event )
+ set_bit(_EVENT_NET_RX, &s->events);
+ }
+ net_ring->rx_cons = shadow_ring->rx_cons;
+ }
+}
+
+
+/*
+ * We need this ioctl for efficient implementation of the
+ * if_indextoname() function required by the IPv6 API. Without
+ * it, we would have to search all the interfaces to find a
+ * match. --pb
+ */
+
+static int dev_ifname(struct ifreq *arg)
+{
+ struct net_device *dev;
+ struct ifreq ifr;
+
+ /*
+ * Fetch the caller's info block.
+ */
+
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_index(ifr.ifr_ifindex);
+ if (!dev) {
+ read_unlock(&dev_base_lock);
+ return -ENODEV;
+ }
+
+ strcpy(ifr.ifr_name, dev->name);
+ read_unlock(&dev_base_lock);
+
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return 0;
+}
+
+
+/**
+ * netdev_set_master - set up master/slave pair
+ * @slave: slave device
+ * @master: new master device
+ *
+ * Changes the master device of the slave. Pass %NULL to break the
+ * bonding. The caller must hold the RTNL semaphore. On a failure
+ * a negative errno code is returned. On success the reference counts
+ * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
+ * function returns zero.
+ */
+
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+ struct net_device *old = slave->master;
+
+ if (master) {
+ if (old)
+ return -EBUSY;
+ dev_hold(master);
+ }
+
+ br_write_lock_bh(BR_NETPROTO_LOCK);
+ slave->master = master;
+ br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+ if (old)
+ dev_put(old);
+
+ if (master)
+ slave->flags |= IFF_SLAVE;
+ else
+ slave->flags &= ~IFF_SLAVE;
+
+ rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+ return 0;
+}
+
+/**
+ * dev_set_promiscuity - update promiscuity count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove promsicuity from a device. While the count in the device
+ * remains above zero the interface remains promiscuous. Once it hits zero
+ * the device reverts back to normal filtering operation. A negative inc
+ * value is used to drop promiscuity on the device.
+ */
+
+void dev_set_promiscuity(struct net_device *dev, int inc)
+{
+ unsigned short old_flags = dev->flags;
+
+ dev->flags |= IFF_PROMISC;
+ if ((dev->promiscuity += inc) == 0)
+ dev->flags &= ~IFF_PROMISC;
+ if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+ if (dev->flags&IFF_PROMISC) {
+ netdev_fastroute_obstacles++;
+ dev_clear_fastroute(dev);
+ } else
+ netdev_fastroute_obstacles--;
+#endif
+ dev_mc_upload(dev);
+ printk(KERN_INFO "device %s %s promiscuous mode\n",
+ dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
+ }
+}
+
+/**
+ * dev_set_allmulti - update allmulti count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove reception of all multicast frames to a device. While the
+ * count in the device remains above zero the interface remains listening
+ * to all interfaces. Once it hits zero the device reverts back to normal
+ * filtering operation. A negative @inc value is used to drop the counter
+ * when releasing a resource needing all multicasts.
+ */
+
+void dev_set_allmulti(struct net_device *dev, int inc)
+{
+ unsigned short old_flags = dev->flags;
+
+ dev->flags |= IFF_ALLMULTI;
+ if ((dev->allmulti += inc) == 0)
+ dev->flags &= ~IFF_ALLMULTI;
+ if (dev->flags^old_flags)
+ dev_mc_upload(dev);
+}
+
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+ int ret;
+ int old_flags = dev->flags;
+
+ /*
+ * Set the flags on our device.
+ */
+
+ dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
+ IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
+ (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
+
+ /*
+ * Load in the correct multicast list now the flags have changed.
+ */
+
+ dev_mc_upload(dev);
+
+ /*
+ * Have we downed the interface. We handle IFF_UP ourselves
+ * according to user attempts to set it, rather than blindly
+ * setting it.
+ */
+
+ ret = 0;
+ if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
+ {
+ ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+ if (ret == 0)
+ dev_mc_upload(dev);
+ }
+
+ if (dev->flags&IFF_UP &&
+ ((old_flags^dev->flags)&
+ ~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+ if ((flags^dev->gflags)&IFF_PROMISC) {
+ int inc = (flags&IFF_PROMISC) ? +1 : -1;
+ dev->gflags ^= IFF_PROMISC;
+ dev_set_promiscuity(dev, inc);
+ }
+
+ /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+ is important. Some (broken) drivers set IFF_PROMISC, when
+ IFF_ALLMULTI is requested not asking us and not reporting.
+ */
+ if ((flags^dev->gflags)&IFF_ALLMULTI) {
+ int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
+ dev->gflags ^= IFF_ALLMULTI;
+ dev_set_allmulti(dev, inc);
+ }
+
+ if (old_flags^dev->flags)
+ rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
+
+ return ret;
+}
+
+/*
+ * Perform the SIOCxIFxxx calls.
+ */
+
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+ struct net_device *dev;
+ int err;
+
+ if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
+ return -ENODEV;
+
+ switch(cmd)
+ {
+ case SIOCGIFFLAGS: /* Get interface flags */
+ ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
+ |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
+ if (netif_running(dev) && netif_carrier_ok(dev))
+ ifr->ifr_flags |= IFF_RUNNING;
+ return 0;
+
+ case SIOCSIFFLAGS: /* Set interface flags */
+ return dev_change_flags(dev, ifr->ifr_flags);
+
+ case SIOCGIFMETRIC: /* Get the metric on the interface */
+ ifr->ifr_metric = 0;
+ return 0;
+
+ case SIOCSIFMETRIC: /* Set the metric on the interface */
+ return -EOPNOTSUPP;
+
+ case SIOCGIFMTU: /* Get the MTU of a device */
+ ifr->ifr_mtu = dev->mtu;
+ return 0;
+
+ case SIOCSIFMTU: /* Set the MTU of a device */
+ if (ifr->ifr_mtu == dev->mtu)
+ return 0;
+
+ /*
+ * MTU must be positive.
+ */
+
+ if (ifr->ifr_mtu<0)
+ return -EINVAL;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ if (dev->change_mtu)
+ err = dev->change_mtu(dev, ifr->ifr_mtu);
+ else {
+ dev->mtu = ifr->ifr_mtu;
+ err = 0;
+ }
+ if (!err && dev->flags&IFF_UP)
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
+ return err;
+
+ case SIOCGIFHWADDR:
+ memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
+ ifr->ifr_hwaddr.sa_family=dev->type;
+ return 0;
+
+ case SIOCSIFHWADDR:
+ if (dev->set_mac_address == NULL)
+ return -EOPNOTSUPP;
+ if (ifr->ifr_hwaddr.sa_family!=dev->type)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
+ if (!err)
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+ return err;
+
+ case SIOCSIFHWBROADCAST:
+ if (ifr->ifr_hwaddr.sa_family!=dev->type)
+ return -EINVAL;
+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+ return 0;
+
+ case SIOCGIFMAP:
+ ifr->ifr_map.mem_start=dev->mem_start;
+ ifr->ifr_map.mem_end=dev->mem_end;
+ ifr->ifr_map.base_addr=dev->base_addr;
+ ifr->ifr_map.irq=dev->irq;
+ ifr->ifr_map.dma=dev->dma;
+ ifr->ifr_map.port=dev->if_port;
+ return 0;
+
+ case SIOCSIFMAP:
+ if (dev->set_config) {
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev->set_config(dev,&ifr->ifr_map);
+ }
+ return -EOPNOTSUPP;
+
+ case SIOCADDMULTI:
+ if (dev->set_multicast_list == NULL ||
+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
+ return 0;
+
+ case SIOCDELMULTI:
+ if (dev->set_multicast_list == NULL ||
+ ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
+ return 0;
+
+ case SIOCGIFINDEX:
+ ifr->ifr_ifindex = dev->ifindex;
+ return 0;
+
+ case SIOCSIFNAME:
+ if (dev->flags&IFF_UP)
+ return -EBUSY;
+ if (__dev_get_by_name(ifr->ifr_newname))
+ return -EEXIST;
+ memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
+ dev->name[IFNAMSIZ-1] = 0;
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ return 0;
+
+#ifdef WIRELESS_EXT
+ case SIOCGIWSTATS:
+ return dev_iwstats(dev, ifr);
+#endif /* WIRELESS_EXT */
+
+ /*
+ * Unknown or private ioctl
+ */
+
+ default:
+ if ((cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15) ||
+ cmd == SIOCBONDENSLAVE ||
+ cmd == SIOCBONDRELEASE ||
+ cmd == SIOCBONDSETHWADDR ||
+ cmd == SIOCBONDSLAVEINFOQUERY ||
+ cmd == SIOCBONDINFOQUERY ||
+ cmd == SIOCBONDCHANGEACTIVE ||
+ cmd == SIOCETHTOOL ||
+ cmd == SIOCGMIIPHY ||
+ cmd == SIOCGMIIREG ||
+ cmd == SIOCSMIIREG) {
+ if (dev->do_ioctl) {
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev->do_ioctl(dev, ifr, cmd);
+ }
+ return -EOPNOTSUPP;
+ }
+
+#ifdef WIRELESS_EXT
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ if (dev->do_ioctl) {
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev->do_ioctl(dev, ifr, cmd);
+ }
+ return -EOPNOTSUPP;
+ }
+#endif /* WIRELESS_EXT */
+
+ }
+ return -EINVAL;
+}
+
+/*
+ * This function handles all "interface"-type I/O control requests. The actual
+ * 'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ * dev_ioctl - network device ioctl
+ * @cmd: command to issue
+ * @arg: pointer to a struct ifreq in user space
+ *
+ * Issue ioctl functions to devices. This is normally called by the
+ * user space syscall interfaces but can sometimes be useful for
+ * other purposes. The return value is the return from the syscall if
+ * positive or a negative errno code on error.
+ */
+
+int dev_ioctl(unsigned int cmd, void *arg)
+{
+ struct ifreq ifr;
+ int ret;
+ char *colon;
+
+ /* One special case: SIOCGIFCONF takes ifconf argument
+ and requires shared lock, because it sleeps writing
+ to user space.
+ */
+
+ if (cmd == SIOCGIFCONF) {
+ return -ENOSYS;
+ }
+ if (cmd == SIOCGIFNAME) {
+ return dev_ifname((struct ifreq *)arg);
+ }
+
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+
+ ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+ colon = strchr(ifr.ifr_name, ':');
+ if (colon)
+ *colon = 0;
+
+ /*
+ * See which interface the caller is talking about.
+ */
+
+ switch(cmd)
+ {
+ /*
+ * These ioctl calls:
+ * - can be done by all.
+ * - atomic and do not require locking.
+ * - return a value
+ */
+
+ case SIOCGIFFLAGS:
+ case SIOCGIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCGIFHWADDR:
+ case SIOCGIFSLAVE:
+ case SIOCGIFMAP:
+ case SIOCGIFINDEX:
+ dev_load(ifr.ifr_name);
+ read_lock(&dev_base_lock);
+ ret = dev_ifsioc(&ifr, cmd);
+ read_unlock(&dev_base_lock);
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
+ return ret;
+
+ /*
+ * These ioctl calls:
+ * - require superuser power.
+ * - require strict serialization.
+ * - return a value
+ */
+
+ case SIOCETHTOOL:
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ dev_load(ifr.ifr_name);
+ dev_probe_lock();
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ dev_probe_unlock();
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
+ return ret;
+
+ /*
+ * These ioctl calls:
+ * - require superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
+
+ case SIOCSIFFLAGS:
+ case SIOCSIFMETRIC:
+ case SIOCSIFMTU:
+ case SIOCSIFMAP:
+ case SIOCSIFHWADDR:
+ case SIOCSIFSLAVE:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ case SIOCSIFHWBROADCAST:
+ case SIOCSIFNAME:
+ case SIOCSMIIREG:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDSLAVEINFOQUERY:
+ case SIOCBONDINFOQUERY:
+ case SIOCBONDCHANGEACTIVE:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ dev_load(ifr.ifr_name);
+ dev_probe_lock();
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ dev_probe_unlock();
+ return ret;
+
+ case SIOCGIFMEM:
+ /* Get the per device memory space. We can add this but currently
+ do not support it */
+ case SIOCSIFMEM:
+ /* Set the per device memory buffer space. */
+ case SIOCSIFLINK:
+ return -EINVAL;
+
+ /*
+ * Unknown or private ioctl.
+ */
+
+ default:
+ if (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15) {
+ dev_load(ifr.ifr_name);
+ dev_probe_lock();
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ dev_probe_unlock();
+ if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return ret;
+ }
+#ifdef WIRELESS_EXT
+ /* Take care of Wireless Extensions */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ /* If command is `set a parameter', or
+ * `get the encoding parameters', check if
+ * the user has the right to do it */
+ if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
+ if(!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ }
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ if (!ret && IW_IS_GET(cmd) &&
+ copy_to_user(arg, &ifr,
+ sizeof(struct ifreq)))
+ return -EFAULT;
+ return ret;
+ }
+#endif /* WIRELESS_EXT */
+ return -EINVAL;
+ }
+}
+
+
+/**
+ * dev_new_index - allocate an ifindex
+ *
+ * Returns a suitable unique value for a new device interface
+ * number. The caller must hold the rtnl semaphore or the
+ * dev_base_lock to be sure it remains unique.
+ */
+
+int dev_new_index(void)
+{
+ static int ifindex;
+ for (;;) {
+ if (++ifindex <= 0)
+ ifindex=1;
+ if (__dev_get_by_index(ifindex) == NULL)
+ return ifindex;
+ }
+}
+
+static int dev_boot_phase = 1;
+
+/**
+ * register_netdevice - register a network device
+ * @dev: device to register
+ *
+ * Take a completed network device structure and add it to the kernel
+ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ * chain. 0 is returned on success. A negative errno code is returned
+ * on a failure to set up the device, or if the name is a duplicate.
+ *
+ * Callers must hold the rtnl semaphore. See the comment at the
+ * end of Space.c for details about the locking. You may want
+ * register_netdev() instead of this.
+ *
+ * BUGS:
+ * The locking appears insufficient to guarantee two parallel registers
+ * will not get the same name.
+ */
+
+int net_dev_init(void);
+
+int register_netdevice(struct net_device *dev)
+{
+ struct net_device *d, **dp;
+#ifdef CONFIG_NET_DIVERT
+ int ret;
+#endif
+
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_FASTROUTE
+ dev->fastpath_lock=RW_LOCK_UNLOCKED;
+#endif
+
+ if (dev_boot_phase)
+ net_dev_init();
+
+#ifdef CONFIG_NET_DIVERT
+ ret = alloc_divert_blk(dev);
+ if (ret)
+ return ret;
+#endif /* CONFIG_NET_DIVERT */
+
+ dev->iflink = -1;
+
+ /* Init, if this function is available */
+ if (dev->init && dev->init(dev) != 0) {
+#ifdef CONFIG_NET_DIVERT
+ free_divert_blk(dev);
+#endif
+ return -EIO;
+ }
+
+ dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+
+ /* Check for existence, and append to tail of chain */
+ for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
+ if (d == dev || strcmp(d->name, dev->name) == 0) {
+#ifdef CONFIG_NET_DIVERT
+ free_divert_blk(dev);
+#endif
+ return -EEXIST;
+ }
+ }
+ /*
+ * nil rebuild_header routine,
+ * that should be never called and used as just bug trap.
+ */
+
+ if (dev->rebuild_header == NULL)
+ dev->rebuild_header = default_rebuild_header;
+
+ /*
+ * Default initial state at registry is that the
+ * device is present.
+ */
+
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+ dev->next = NULL;
+ dev_init_scheduler(dev);
+ write_lock_bh(&dev_base_lock);
+ *dp = dev;
+ dev_hold(dev);
+ dev->deadbeaf = 0;
+ write_unlock_bh(&dev_base_lock);
+
+ /* Notify protocols, that a new device appeared. */
+ notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+ return 0;
+}
+
+/**
+ * netdev_finish_unregister - complete unregistration
+ * @dev: device
+ *
+ * Destroy and free a dead device. A value of zero is returned on
+ * success.
+ */
+
+int netdev_finish_unregister(struct net_device *dev)
+{
+ BUG_TRAP(dev->ip_ptr==NULL);
+ BUG_TRAP(dev->ip6_ptr==NULL);
+ BUG_TRAP(dev->dn_ptr==NULL);
+
+ if (!dev->deadbeaf) {
+ printk(KERN_ERR "Freeing alive device %p, %s\n",
+ dev, dev->name);
+ return 0;
+ }
+#ifdef NET_REFCNT_DEBUG
+ printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
+ (dev->features & NETIF_F_DYNALLOC)?"":", old style");
+#endif
+ if (dev->destructor)
+ dev->destructor(dev);
+ if (dev->features & NETIF_F_DYNALLOC)
+ kfree(dev);
+ return 0;
+}
+
+/**
+ * unregister_netdevice - remove device from the kernel
+ * @dev: device
+ *
+ * This function shuts down a device interface and removes it
+ * from the kernel tables. On success 0 is returned, on a failure
+ * a negative errno code is returned.
+ *
+ * Callers must hold the rtnl semaphore. See the comment at the
+ * end of Space.c for details about the locking. You may want
+ * unregister_netdev() instead of this.
+ */
+
+int unregister_netdevice(struct net_device *dev)
+{
+ unsigned long now, warning_time;
+ struct net_device *d, **dp;
+
+ /* If device is running, close it first. */
+ if (dev->flags & IFF_UP)
+ dev_close(dev);
+
+ BUG_TRAP(dev->deadbeaf==0);
+ dev->deadbeaf = 1;
+
+ /* And unlink it from device chain. */
+ for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
+ if (d == dev) {
+ write_lock_bh(&dev_base_lock);
+ *dp = d->next;
+ write_unlock_bh(&dev_base_lock);
+ break;
+ }
+ }
+ if (d == NULL) {
+ printk(KERN_DEBUG "unregister_netdevice: device %s/%p"
+ " not registered\n", dev->name, dev);
+ return -ENODEV;
+ }
+
+ /* Synchronize to net_rx_action. */
+ br_write_lock_bh(BR_NETPROTO_LOCK);
+ br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+ if (dev_boot_phase == 0) {
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+ /* Notify protocols, that we are about to destroy
+ this device. They should clean all the things.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+
+ /*
+ * Flush the multicast chain
+ */
+ dev_mc_discard(dev);
+ }
+
+ if (dev->uninit)
+ dev->uninit(dev);
+
+ /* Notifier chain MUST detach us from master device. */
+ BUG_TRAP(dev->master==NULL);
+
+#ifdef CONFIG_NET_DIVERT
+ free_divert_blk(dev);
+#endif
+
+ if (dev->features & NETIF_F_DYNALLOC) {
+#ifdef NET_REFCNT_DEBUG
+ if (atomic_read(&dev->refcnt) != 1)
+ printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n",
+ dev->name, atomic_read(&dev->refcnt)-1);
+#endif
+ dev_put(dev);
+ return 0;
+ }
+
+ /* Last reference is our one */
+ if (atomic_read(&dev->refcnt) == 1) {
+ dev_put(dev);
+ return 0;
+ }
+
+#ifdef NET_REFCNT_DEBUG
+ printk("unregister_netdevice: waiting %s refcnt=%d\n",
+ dev->name, atomic_read(&dev->refcnt));
+#endif
+
+ /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
+ it means that someone in the kernel still has a reference
+ to this device and we cannot release it.
+
+ "New style" devices have destructors, hence we can return from this
+ function and destructor will do all the work later. As of kernel 2.4.0
+ there are very few "New Style" devices.
+
+ "Old style" devices expect that the device is free of any references
+ upon exit from this function.
+ We cannot return from this function until all such references have
+ fallen away. This is because the caller of this function will probably
+ immediately kfree(*dev) and then be unloaded via sys_delete_module.
+
+ So, we linger until all references fall away. The duration of the
+ linger is basically unbounded! It is driven by, for example, the
+ current setting of sysctl_ipfrag_time.
+
+ After 1 second, we start to rebroadcast unregister notifications
+ in hope that careless clients will release the device.
+
+ */
+
+ now = warning_time = jiffies;
+ while (atomic_read(&dev->refcnt) != 1) {
+ if ((jiffies - now) > 1*HZ) {
+ /* Rebroadcast unregister notification */
+ notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+ }
+ mdelay(250);
+ if ((jiffies - warning_time) > 10*HZ) {
+ printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
+ "become free. Usage count = %d\n",
+ dev->name, atomic_read(&dev->refcnt));
+ warning_time = jiffies;
+ }
+ }
+ dev_put(dev);
+ return 0;
+}
+
+
+/*
+ * Initialize the DEV module. At boot time this walks the device list and
+ * unhooks any devices that fail to initialise (normally hardware not
+ * present) and leaves us with a valid list of present and active devices.
+ *
+ */
+
+extern void net_device_init(void);
+extern void ip_auto_config(void);
+#ifdef CONFIG_NET_DIVERT
+extern void dv_init(void);
+#endif /* CONFIG_NET_DIVERT */
+
+
+/*
+ * Callers must hold the rtnl semaphore. See the comment at the
+ * end of Space.c for details about the locking.
+ */
+int __init net_dev_init(void)
+{
+ struct net_device *dev, **dp;
+
+ if ( !dev_boot_phase )
+ return 0;
+
+ skb_init();
+
+ net_header_cachep = kmem_cache_create(
+ "net_header_cache",
+ (PKT_PROT_LEN + sizeof(void *) - 1) & ~(sizeof(void *) - 1),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+ spin_lock_init(&net_schedule_list_lock);
+ INIT_LIST_HEAD(&net_schedule_list);
+
+ /*
+ * Add the devices.
+ * If the call to dev->init fails, the dev is removed
+ * from the chain disconnecting the device until the
+ * next reboot.
+ *
+ * NB At boot phase networking is dead. No locking is required.
+ * But we still preserve dev_base_lock for sanity.
+ */
+ dp = &dev_base;
+ while ((dev = *dp) != NULL) {
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+
+ dev->xmit_lock_owner = -1;
+ dev->iflink = -1;
+ dev_hold(dev);
+
+ /*
+ * Allocate name. If the init() fails
+ * the name will be reissued correctly.
+ */
+ if (strchr(dev->name, '%'))
+ dev_alloc_name(dev, dev->name);
+
+ if (dev->init && dev->init(dev)) {
+ /*
+ * It failed to come up. It will be unhooked later.
+ * dev_alloc_name can now advance to next suitable
+ * name that is checked next.
+ */
+ dev->deadbeaf = 1;
+ dp = &dev->next;
+ } else {
+ dp = &dev->next;
+ dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+ if (dev->rebuild_header == NULL)
+ dev->rebuild_header = default_rebuild_header;
+ dev_init_scheduler(dev);
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+ }
+ }
+
+ /*
+ * Unhook devices that failed to come up
+ */
+ dp = &dev_base;
+ while ((dev = *dp) != NULL) {
+ if (dev->deadbeaf) {
+ write_lock_bh(&dev_base_lock);
+ *dp = dev->next;
+ write_unlock_bh(&dev_base_lock);
+ dev_put(dev);
+ } else {
+ dp = &dev->next;
+ }
+ }
+
+ dev_boot_phase = 0;
+
+ dev_mcast_init();
+
+ /*
+ * Initialise network devices
+ */
+
+ net_device_init();
+
+ return 0;
+}
+
+inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev)
+{
+ memcpy(data + ETH_ALEN, dev->dev_addr, ETH_ALEN);
+
+ switch ( ntohs(*(unsigned short *)(data + 12)) )
+ {
+ case ETH_P_ARP:
+ if ( len < 42 ) break;
+ memcpy(data + 22, dev->dev_addr, 6);
+ return ETH_P_ARP;
+ case ETH_P_IP:
+ return ETH_P_IP;
+ }
+ return 0;
+}
+
+
+/*
+ * do_net_update:
+ *
+ * Called from guest OS to notify updates to its transmit and/or receive
+ * descriptor rings.
+ */
+
+long do_net_update(void)
+{
+ net_ring_t *net_ring;
+ net_shadow_ring_t *shadow_ring;
+ net_vif_t *current_vif;
+ unsigned int i, j;
+ struct sk_buff *skb;
+ tx_entry_t tx;
+ rx_shadow_entry_t *rx;
+ unsigned long pfn;
+ struct pfn_info *page;
+ unsigned long *g_pte;
+
+ for ( j = 0; j < current->num_net_vifs; j++)
+ {
+ int target;
+ u8 *g_data;
+ unsigned short protocol;
+
+ current_vif = current->net_vif_list[j];
+ net_ring = current_vif->net_ring;
+ shadow_ring = current_vif->shadow_ring;
+
+ /*
+ * PHASE 1 -- TRANSMIT RING
+ */
+
+ /*
+ * Collect up new transmit buffers. We collect up to the guest OS's
+ * new producer index, but take care not to catch up with our own
+ * consumer index.
+ */
+ for ( i = shadow_ring->tx_prod;
+ (i != net_ring->tx_prod) &&
+ (((shadow_ring->tx_cons-i) & (TX_RING_SIZE-1)) != 1);
+ i = TX_RING_INC(i) )
+ {
+ if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
+ {
+ DPRINTK("Bad copy_from_user for tx net descriptor\n");
+ shadow_ring->tx_ring[i].status = RING_STATUS_ERR_CFU;
+ continue;
+ }
+
+ shadow_ring->tx_ring[i].size = tx.size;
+ shadow_ring->tx_ring[i].status = RING_STATUS_BAD_PAGE;
+
+ if ( tx.size < PKT_PROT_LEN )
+ {
+ DPRINTK("Runt packet %d\n", tx.size);
+ continue;
+ }
+
+ if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE )
+ {
+ DPRINTK("tx.addr: %lx, size: %u, end: %lu\n",
+ tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
+ continue;
+ }
+
+ pfn = tx.addr >> PAGE_SHIFT;
+ page = frame_table + pfn;
+ if ( (pfn >= max_page) ||
+ ((page->flags & PG_domain_mask) != current->domain) )
+ {
+ DPRINTK("Bad page frame\n");
+ continue;
+ }
+
+ g_data = map_domain_mem(tx.addr);
+
+ protocol = __constant_htons(
+ init_tx_header(g_data, tx.size, the_dev));
+ if ( protocol == 0 )
+ goto unmap_and_continue;
+
+ target = __net_get_target_vif(g_data, tx.size, current_vif->id);
+
+ if ( target > VIF_PHYSICAL_INTERFACE )
+ {
+ /* Local delivery */
+ if ( (skb = dev_alloc_skb(tx.size)) == NULL )
+ goto unmap_and_continue;
+
+ skb->destructor = tx_skb_release;
+
+ shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+
+ skb->src_vif = current_vif->id;
+ skb->dst_vif = target;
+ skb->protocol = protocol;
+
+ skb->head = (u8 *)map_domain_mem(
+ ((skb->pf - frame_table) << PAGE_SHIFT));
+ skb->data = skb->head + 16;
+ skb_reserve(skb,2);
+ memcpy(skb->data, g_data, tx.size);
+ skb->len = tx.size;
+ unmap_domain_mem(skb->head);
+ skb->data += ETH_HLEN;
+ (void)netif_rx(skb);
+ }
+ else if ( target == VIF_PHYSICAL_INTERFACE )
+ {
+ shadow_ring->tx_ring[i].header =
+ kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
+ if ( shadow_ring->tx_ring[i].header == NULL )
+ goto unmap_and_continue;
+ memcpy(shadow_ring->tx_ring[i].header, g_data, PKT_PROT_LEN);
+ shadow_ring->tx_ring[i].payload = tx.addr + PKT_PROT_LEN;
+ shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+ get_page_tot(page);
+ }
+
+ unmap_and_continue:
+ unmap_domain_mem(g_data);
+ }
+ smp_wmb(); /* Let other CPUs see new descriptors first. */
+ shadow_ring->tx_prod = i;
+
+ /* XXX: This should be more consevative. */
+ add_to_net_schedule_list_tail(current_vif);
+ tasklet_schedule(&net_tx_tasklet);
+
+ /*
+ * PHASE 2 -- RECEIVE RING
+ */
+
+ /*
+ * Collect up new receive buffers. We collect up to the guest OS's
+ * new producer index, but take care not to catch up with our own
+ * consumer index.
+ */
+ for ( i = shadow_ring->rx_prod;
+ (i != net_ring->rx_prod) &&
+ (((shadow_ring->rx_cons-i) & (RX_RING_SIZE-1)) != 1);
+ i = RX_RING_INC(i) )
+ {
+ /*
+ * This copy assumes that rx_shadow_entry_t is an extension of
+ * rx_net_entry_t extra fields must be tacked on to the end.
+ */
+ if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i,
+ sizeof (rx_entry_t) ) )
+ {
+ DPRINTK("Bad copy_from_user for rx ring\n");
+ shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
+ continue;
+ }
+
+ rx = shadow_ring->rx_ring + i;
+ pfn = rx->addr >> PAGE_SHIFT;
+ page = frame_table + pfn;
+
+ shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
+
+ if ( (pfn >= max_page) ||
+ (page->flags != (PGT_l1_page_table | current->domain)) )
+ {
+ DPRINTK("Bad page frame containing ppte\n");
+ continue;
+ }
+
+ g_pte = map_domain_mem(rx->addr);
+
+ if (!(*g_pte & _PAGE_PRESENT))
+ {
+ DPRINTK("Inavlid PTE passed down (not present)\n");
+ unmap_domain_mem(g_pte);
+ continue;
+ }
+
+ page = (*g_pte >> PAGE_SHIFT) + frame_table;
+
+ if (page->tot_count != 1)
+ {
+ DPRINTK("An rx page must be mapped exactly once\n");
+ unmap_domain_mem(g_pte);
+ continue;
+ }
+
+ /* The pte they passed was good, so take it away from them. */
+ shadow_ring->rx_ring[i].status = RING_STATUS_OK;
+ *g_pte &= ~_PAGE_PRESENT;
+ page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf;
+ rx->flush_count = tlb_flush_count[smp_processor_id()];
+
+ unmap_domain_mem(g_pte);
+ }
+ smp_wmb(); /* Let other CPUs see new descriptors first. */
+ shadow_ring->rx_prod = net_ring->rx_prod;
+ }
+ return 0;
+}
+
+
+int setup_network_devices(void)
+{
+ int ret;
+ extern char opt_ifname[];
+ struct net_device *dev = dev_get_by_name(opt_ifname);
+
+ if ( dev == NULL )
+ {
+ printk("Could not find device %s\n", opt_ifname);
+ return 0;
+ }
+
+ ret = dev_open(dev);
+ if ( ret != 0 )
+ {
+ printk("Error opening device %s for use (%d)\n", opt_ifname, ret);
+ return 0;
+ }
+ printk("Device %s opened and ready for use.\n", opt_ifname);
+ the_dev = dev;
+
+ tasklet_enable(&net_tx_tasklet);
+
+ return 1;
+}
+
diff --git a/xen/net/dev_mcast.c b/xen/net/dev_mcast.c
new file mode 100644
index 0000000000..d7d2ae338d
--- /dev/null
+++ b/xen/net/dev_mcast.c
@@ -0,0 +1,276 @@
+/*
+ * Linux NET3: Multicast List maintenance.
+ *
+ * Authors:
+ * Tim Kordas <tjk@nostromo.eeap.cwru.edu>
+ * Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ * Stir fried together from the IP multicast and CAP patches above
+ * Alan Cox <Alan.Cox@linux.org>
+ *
+ * Fixes:
+ * Alan Cox : Update the device on a real delete
+ * rather than any time but...
+ * Alan Cox : IFF_ALLMULTI support.
+ * Alan Cox : New format set_multicast_list() calls.
+ * Gleb Natapov : Remove dev_mc_lock.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+//#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+//#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+//#include <linux/proc_fs.h>
+#include <linux/init.h>
+//#include <net/ip.h>
+//#include <net/route.h>
+#include <linux/skbuff.h>
+//#include <net/sock.h>
+//#include <net/arp.h>
+
+
+/*
+ * Device multicast list maintenance.
+ *
+ * This is used both by IP and by the user level maintenance functions.
+ * Unlike BSD we maintain a usage count on a given multicast address so
+ * that a casual user application can add/delete multicasts used by
+ * protocols without doing damage to the protocols when it deletes the
+ * entries. It also helps IP as it tracks overlapping maps.
+ *
+ * Device mc lists are changed by bh at least if IPv6 is enabled,
+ * so that it must be bh protected.
+ *
+ * We block accesses to device mc filters with dev->xmit_lock.
+ */
+
+/*
+ * Update the multicast list into the physical NIC controller.
+ */
+
+static void __dev_mc_upload(struct net_device *dev)
+{
+ /* Don't do anything till we up the interface
+ * [dev_open will call this function so the list will
+ * stay sane]
+ */
+
+ if (!(dev->flags&IFF_UP))
+ return;
+
+ /*
+ * Devices with no set multicast or which have been
+ * detached don't get set.
+ */
+
+ if (dev->set_multicast_list == NULL ||
+ !netif_device_present(dev))
+ return;
+
+ dev->set_multicast_list(dev);
+}
+
+void dev_mc_upload(struct net_device *dev)
+{
+ spin_lock_bh(&dev->xmit_lock);
+ __dev_mc_upload(dev);
+ spin_unlock_bh(&dev->xmit_lock);
+}
+
+/*
+ * Delete a device level multicast
+ */
+
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
+{
+ int err = 0;
+ struct dev_mc_list *dmi, **dmip;
+
+ spin_lock_bh(&dev->xmit_lock);
+
+ for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
+ /*
+ * Find the entry we want to delete. The device could
+ * have variable length entries so check these too.
+ */
+ if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+ alen == dmi->dmi_addrlen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 0;
+ if (old_glbl == 0)
+ break;
+ }
+ if (--dmi->dmi_users)
+ goto done;
+
+ /*
+ * Last user. So delete the entry.
+ */
+ *dmip = dmi->next;
+ dev->mc_count--;
+
+ kfree(dmi);
+
+ /*
+ * We have altered the list, so the card
+ * loaded filter is now wrong. Fix it
+ */
+ __dev_mc_upload(dev);
+
+ spin_unlock_bh(&dev->xmit_lock);
+ return 0;
+ }
+ }
+ err = -ENOENT;
+done:
+ spin_unlock_bh(&dev->xmit_lock);
+ return err;
+}
+
+/*
+ * Add a device level multicast
+ */
+
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
+{
+ int err = 0;
+ struct dev_mc_list *dmi, *dmi1;
+
+ dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+
+ spin_lock_bh(&dev->xmit_lock);
+ for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
+ if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+ dmi->dmi_addrlen == alen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 1;
+ if (old_glbl)
+ goto done;
+ }
+ dmi->dmi_users++;
+ goto done;
+ }
+ }
+
+ if ((dmi = dmi1) == NULL) {
+ spin_unlock_bh(&dev->xmit_lock);
+ return -ENOMEM;
+ }
+ memcpy(dmi->dmi_addr, addr, alen);
+ dmi->dmi_addrlen = alen;
+ dmi->next = dev->mc_list;
+ dmi->dmi_users = 1;
+ dmi->dmi_gusers = glbl ? 1 : 0;
+ dev->mc_list = dmi;
+ dev->mc_count++;
+
+ __dev_mc_upload(dev);
+
+ spin_unlock_bh(&dev->xmit_lock);
+ return 0;
+
+done:
+ spin_unlock_bh(&dev->xmit_lock);
+ if (dmi1)
+ kfree(dmi1);
+ return err;
+}
+
+/*
+ * Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct net_device *dev)
+{
+ spin_lock_bh(&dev->xmit_lock);
+
+ while (dev->mc_list != NULL) {
+ struct dev_mc_list *tmp = dev->mc_list;
+ dev->mc_list = tmp->next;
+ if (tmp->dmi_users > tmp->dmi_gusers)
+ printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+ kfree(tmp);
+ }
+ dev->mc_count = 0;
+
+ spin_unlock_bh(&dev->xmit_lock);
+}
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos = 0, begin = 0;
+ struct dev_mc_list *m;
+ int len = 0;
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev; dev = dev->next) {
+ spin_lock_bh(&dev->xmit_lock);
+ for (m = dev->mc_list; m; m = m->next) {
+ int i;
+
+ len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex,
+ dev->name, m->dmi_users, m->dmi_gusers);
+
+ for (i = 0; i < m->dmi_addrlen; i++)
+ len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+ len += sprintf(buffer+len, "\n");
+
+ pos = begin + len;
+ if (pos < offset) {
+ len = 0;
+ begin = pos;
+ }
+ if (pos > offset + length) {
+ spin_unlock_bh(&dev->xmit_lock);
+ goto done;
+ }
+ }
+ spin_unlock_bh(&dev->xmit_lock);
+ }
+ *eof = 1;
+
+done:
+ read_unlock(&dev_base_lock);
+ *start = buffer + (offset - begin);
+ len -= (offset - begin);
+ if (len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+#endif
+
+void __init dev_mcast_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ create_proc_read_entry("net/dev_mcast", 0, 0, dev_mc_read_proc, NULL);
+#endif
+}
+
diff --git a/xen/net/devinit.c b/xen/net/devinit.c
new file mode 100644
index 0000000000..f3ce2c39d4
--- /dev/null
+++ b/xen/net/devinit.c
@@ -0,0 +1,109 @@
+/******************************************************************************
+ * devinit.c
+ *
+ * This is the watchdog timer routines, ripped from sch_generic.c
+ * Original copyright notice appears below.
+ *
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Jamal Hadi Salim, <hadi@nortelnetworks.com> 990601
+ * - Ingress support
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/lib.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+static void dev_watchdog(unsigned long arg)
+{
+ struct net_device *dev = (struct net_device *)arg;
+
+ spin_lock(&dev->xmit_lock);
+ if (netif_device_present(dev) &&
+ netif_running(dev) &&
+ netif_carrier_ok(dev)) {
+ if (netif_queue_stopped(dev) &&
+ (jiffies - dev->trans_start) > dev->watchdog_timeo) {
+ printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
+ dev->tx_timeout(dev);
+ }
+ if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+ dev_hold(dev);
+ }
+ spin_unlock(&dev->xmit_lock);
+
+ dev_put(dev);
+}
+
+static void dev_watchdog_init(struct net_device *dev)
+{
+ init_timer(&dev->watchdog_timer);
+ dev->watchdog_timer.data = (unsigned long)dev;
+ dev->watchdog_timer.function = dev_watchdog;
+}
+
+void __netdev_watchdog_up(struct net_device *dev)
+{
+ if (dev->tx_timeout) {
+ if (dev->watchdog_timeo <= 0)
+ dev->watchdog_timeo = 5*HZ;
+ if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+ dev_hold(dev);
+ }
+}
+
+static void dev_watchdog_up(struct net_device *dev)
+{
+ spin_lock_bh(&dev->xmit_lock);
+ __netdev_watchdog_up(dev);
+ spin_unlock_bh(&dev->xmit_lock);
+}
+
+static void dev_watchdog_down(struct net_device *dev)
+{
+ spin_lock_bh(&dev->xmit_lock);
+ if (del_timer(&dev->watchdog_timer))
+ __dev_put(dev);
+ spin_unlock_bh(&dev->xmit_lock);
+}
+
+void dev_activate(struct net_device *dev)
+{
+ spin_lock_bh(&dev->queue_lock);
+ dev->trans_start = jiffies;
+ dev_watchdog_up(dev);
+ spin_unlock_bh(&dev->queue_lock);
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+ dev_watchdog_down(dev);
+}
+
+void dev_init_scheduler(struct net_device *dev)
+{
+ dev_watchdog_init(dev);
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+}
diff --git a/xen/net/eth.c b/xen/net/eth.c
new file mode 100644
index 0000000000..5238de022e
--- /dev/null
+++ b/xen/net/eth.c
@@ -0,0 +1,252 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Ethernet-type device handling.
+ *
+ * Version: @(#)eth.c 1.0.7 05/25/93
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Mark Evans, <evansmp@uhura.aston.ac.uk>
+ * Florian La Roche, <rzsfl@rz.uni-sb.de>
+ * Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *
+ * Fixes:
+ * Mr Linux : Arp problems
+ * Alan Cox : Generic queue tidyup (very tiny here)
+ * Alan Cox : eth_header ntohs should be htons
+ * Alan Cox : eth_rebuild_header missing an htons and
+ * minor other things.
+ * Tegge : Arp bug fixes.
+ * Florian : Removed many unnecessary functions, code cleanup
+ * and changes for new arp and skbuff.
+ * Alan Cox : Redid header building to reflect new format.
+ * Alan Cox : ARP only when compiled with CONFIG_INET
+ * Greg Page : 802.2 and SNAP stuff.
+ * Alan Cox : MAC layer pointers/new format.
+ * Paul Gortmaker : eth_copy_and_sum shouldn't csum padding.
+ * Alan Cox : Protect against forwarding explosions with
+ * older network drivers and IFF_ALLMULTI.
+ * Christer Weinigel : Better rebuild header message.
+ * Andrew Morton : 26Feb01: kill ether_setup() - use netdev_boot_setup().
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+//#include <linux/in.h>
+//#include <linux/inet.h>
+//#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/init.h>
+//#include <net/dst.h>
+//#include <net/arp.h>
+//#include <net/sock.h>
+//#include <net/ipv6.h>
+//#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+//#include <asm/checksum.h>
+
+//extern int __init netdev_boot_setup(char *str);
+
+//__setup("ether=", netdev_boot_setup);
+
+/*
+ * Create the Ethernet MAC header for an arbitrary protocol layer
+ *
+ * saddr=NULL means use device source address
+ * daddr=NULL means leave destination address (eg unresolved arp)
+ */
+
+int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+ void *daddr, void *saddr, unsigned len)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+
+ /*
+ * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+ * in here instead. It is up to the 802.2 layer to carry protocol information.
+ */
+
+ if(type!=ETH_P_802_3)
+ eth->h_proto = htons(type);
+ else
+ eth->h_proto = htons(len);
+
+ /*
+ * Set the source hardware address.
+ */
+
+ if(saddr)
+ memcpy(eth->h_source,saddr,dev->addr_len);
+ else
+ memcpy(eth->h_source,dev->dev_addr,dev->addr_len);
+
+ /*
+ * Anyway, the loopback-device should never use this function...
+ */
+
+ if (dev->flags & (IFF_LOOPBACK|IFF_NOARP))
+ {
+ memset(eth->h_dest, 0, dev->addr_len);
+ return(dev->hard_header_len);
+ }
+
+ if(daddr)
+ {
+ memcpy(eth->h_dest,daddr,dev->addr_len);
+ return dev->hard_header_len;
+ }
+
+ return -dev->hard_header_len;
+}
+
+
+/*
+ * Rebuild the Ethernet MAC header. This is called after an ARP
+ * (or in future other address resolution) has completed on this
+ * sk_buff. We now let ARP fill in the other fields.
+ *
+ * This routine CANNOT use cached dst->neigh!
+ * Really, it is used only when dst->neigh is wrong.
+ */
+
+int eth_rebuild_header(struct sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ struct net_device *dev = skb->dev;
+
+ switch (eth->h_proto)
+ {
+#ifdef CONFIG_INET
+ case __constant_htons(ETH_P_IP):
+ return arp_find(eth->h_dest, skb);
+#endif
+ default:
+ printk(KERN_DEBUG
+ "%s: unable to resolve type %X addresses.\n",
+ dev->name, (int)eth->h_proto);
+
+ memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+ break;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Determine the packet's protocol ID. The rule here is that we
+ * assume 802.3 if the type field is short enough to be a length.
+ * This is normal practice and works for any 'now in use' protocol.
+ */
+
+unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ethhdr *eth;
+ unsigned char *rawp;
+
+ if (skb->skb_type == SKB_ZERO_COPY)
+ {
+ skb_pull(skb,dev->hard_header_len);
+ skb->mac.raw= (void *)0xdeadbeef;
+ return htons(ETH_P_802_2);
+
+ } else { // SKB_NORMAL
+
+ skb->mac.raw=skb->data;
+ skb_pull(skb,dev->hard_header_len);
+ eth= skb->mac.ethernet;
+
+ if(*eth->h_dest&1)
+ {
+ if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
+ skb->pkt_type=PACKET_BROADCAST;
+ else
+ skb->pkt_type=PACKET_MULTICAST;
+ }
+
+ /*
+ * This ALLMULTI check should be redundant by 1.4
+ * so don't forget to remove it.
+ *
+ * Seems, you forgot to remove it. All silly devices
+ * seems to set IFF_PROMISC.
+ */
+
+ else if(1 /*dev->flags&IFF_PROMISC*/)
+ {
+ if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
+ skb->pkt_type=PACKET_OTHERHOST;
+ }
+
+ if (ntohs(eth->h_proto) >= 1536)
+ return eth->h_proto;
+
+ rawp = skb->data;
+
+ /*
+ * This is a magic hack to spot IPX packets. Older Novell breaks
+ * the protocol design and runs IPX over 802.3 without an 802.2 LLC
+ * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+ * won't work for fault tolerant netware but does for the rest.
+ */
+ if (*(unsigned short *)rawp == 0xFFFF)
+ return htons(ETH_P_802_3);
+
+ /*
+ * Real 802.2 LLC
+ */
+ return htons(ETH_P_802_2);
+ }
+}
+
+
+int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
+{
+ struct ethhdr *eth = skb->mac.ethernet;
+ memcpy(haddr, eth->h_source, ETH_ALEN);
+ return ETH_ALEN;
+}
+
+int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
+{
+#if 0
+ unsigned short type = hh->hh_type;
+ struct ethhdr *eth = (struct ethhdr*)(((u8*)hh->hh_data) + 2);
+ struct net_device *dev = neigh->dev;
+
+ if (type == __constant_htons(ETH_P_802_3))
+ return -1;
+
+ eth->h_proto = type;
+ memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+ memcpy(eth->h_dest, neigh->ha, dev->addr_len);
+ hh->hh_len = ETH_HLEN;
+#endif
+ return 0;
+}
+
+/*
+ * Called by Address Resolution module to notify changes in address.
+ */
+
+void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
+{
+ memcpy(((u8*)hh->hh_data) + 2, haddr, dev->addr_len);
+}
diff --git a/xen/net/skbuff.c b/xen/net/skbuff.c
new file mode 100644
index 0000000000..695a6f6b63
--- /dev/null
+++ b/xen/net/skbuff.c
@@ -0,0 +1,501 @@
+/*
+ * Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
+ * Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ * Version: $Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
+ *
+ * Fixes:
+ * Alan Cox : Fixed the worst of the load balancer bugs.
+ * Dave Platt : Interrupt stacking fix.
+ * Richard Kooijman : Timestamp fixes.
+ * Alan Cox : Changed buffer format.
+ * Alan Cox : destructor hook for AF_UNIX etc.
+ * Linus Torvalds : Better skb_clone.
+ * Alan Cox : Added skb_copy.
+ * Alan Cox : Added all the changed routines Linus
+ * only put in the headers
+ * Ray VanTassle : Fixed --skb->lock in free
+ * Alan Cox : skb_copy copy arp field
+ * Andi Kleen : slabified it.
+ *
+ * NOTE:
+ * The __skb_ routines should be called with interrupts
+ * disabled, or you better be *real* sure that the operation is atomic
+ * with respect to whatever list is being frobbed (e.g. via lock_sock()
+ * or via disabling bottom half handlers, etc).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#define BUG_TRAP ASSERT
+
+int sysctl_hot_list_len = 128;
+
+static kmem_cache_t *skbuff_head_cache;
+
+static union {
+ struct sk_buff_head list;
+ char pad[SMP_CACHE_BYTES];
+} skb_head_pool[NR_CPUS];
+
+/*
+ * Keep out-of-line to prevent kernel bloat.
+ * __builtin_return_address is not used because it is not always
+ * reliable.
+ */
+
+/**
+ * skb_over_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_put(). Not user callable.
+ */
+
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+ printk("skput:over: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ BUG();
+}
+
+/**
+ * skb_under_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_push(). Not user callable.
+ */
+
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+ printk("skput:under: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ BUG();
+}
+
+static __inline__ struct sk_buff *skb_head_from_pool(void)
+{
+ struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+ if (skb_queue_len(list)) {
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ skb = __skb_dequeue(list);
+ local_irq_restore(flags);
+ return skb;
+ }
+ return NULL;
+}
+
+static __inline__ void skb_head_to_pool(struct sk_buff *skb)
+{
+ struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+ if (skb_queue_len(list) < sysctl_hot_list_len) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __skb_queue_head(list, skb);
+ local_irq_restore(flags);
+
+ return;
+ }
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
+static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
+{
+ struct list_head *list_ptr;
+ struct pfn_info *pf;
+ unsigned long flags;
+
+ spin_lock_irqsave(&free_list_lock, flags);
+
+ if (!free_pfns) return NULL;
+
+ list_ptr = free_list.next;
+ pf = list_entry(list_ptr, struct pfn_info, list);
+ pf->flags = 0; /* owned by dom0 */
+ list_del(&pf->list);
+ free_pfns--;
+
+ spin_unlock_irqrestore(&free_list_lock, flags);
+
+ skb->pf = pf;
+ return (u8 *)((pf - frame_table) << PAGE_SHIFT);
+}
+
+static inline void dealloc_skb_data_page(struct sk_buff *skb)
+{
+ struct pfn_info *pf;
+ unsigned long flags;
+
+ pf = skb->pf;
+
+ spin_lock_irqsave(&free_list_lock, flags);
+
+ list_add(&pf->list, &free_list);
+ free_pfns++;
+
+ spin_unlock_irqrestore(&free_list_lock, flags);
+
+}
+
+static inline void INTERRUPT_CHECK(int gfp_mask)
+{
+ if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+ printk(KERN_ERR "alloc_skb called nonatomically\n");
+ BUG();
+ }
+}
+
+
+/**
+ * alloc_skb - allocate a network buffer
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ *
+ * Allocate a new &sk_buff. The returned buffer has no headroom and a
+ * tail room of size bytes. The object has a reference count of one.
+ * The return is the buffer. On a failure the return is %NULL.
+ *
+ * Buffers may only be allocated from interrupts using a @gfp_mask of
+ * %GFP_ATOMIC.
+ */
+
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
+{
+ struct sk_buff *skb;
+ u8 *data;
+
+ INTERRUPT_CHECK(gfp_mask);
+
+ /* Get the HEAD */
+ skb = skb_head_from_pool();
+ if (skb == NULL) {
+ skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+ if (skb == NULL)
+ goto nohead;
+ }
+
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = SKB_DATA_ALIGN(size);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (data == NULL)
+ goto nodata;
+
+ /* Load the data pointers. */
+ skb->head = data;
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
+
+ /* Set up other state */
+ skb->len = 0;
+ skb->data_len = 0;
+ skb->src_vif = VIF_UNKNOWN_INTERFACE;
+ skb->dst_vif = VIF_UNKNOWN_INTERFACE;
+ skb->skb_type = SKB_NORMAL;
+
+ skb_shinfo(skb)->nr_frags = 0;
+ return skb;
+
+ nodata:
+ skb_head_to_pool(skb);
+ nohead:
+ return NULL;
+}
+
+
+struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
+{
+ struct sk_buff *skb;
+ u8 *data;
+
+ INTERRUPT_CHECK(gfp_mask);
+
+ /* Get the HEAD */
+ skb = skb_head_from_pool();
+ if (skb == NULL) {
+ skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+ if (skb == NULL)
+ goto nohead;
+ }
+
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = SKB_DATA_ALIGN(size);
+ data = alloc_skb_data_page(skb);
+
+ if (data == NULL)
+ goto nodata;
+
+ /* A FAKE virtual address, so that pci_map_xxx dor the right thing. */
+ data = phys_to_virt((unsigned long)data);
+
+ /* Load the data pointers. */
+ skb->head = data;
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
+
+ /* Set up other state */
+ skb->len = 0;
+ skb->data_len = 0;
+ skb->src_vif = VIF_UNKNOWN_INTERFACE;
+ skb->dst_vif = VIF_UNKNOWN_INTERFACE;
+ skb->skb_type = SKB_ZERO_COPY;
+
+ skb_shinfo(skb)->nr_frags = 0;
+
+ return skb;
+
+ nodata:
+ skb_head_to_pool(skb);
+ nohead:
+ return NULL;
+}
+
+
+struct sk_buff *alloc_skb_nodata(int gfp_mask)
+{
+ struct sk_buff *skb;
+
+ INTERRUPT_CHECK(gfp_mask);
+
+ /* Get the HEAD */
+ skb = skb_head_from_pool();
+ if (skb == NULL) {
+ skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+ if (skb == NULL)
+ return NULL;
+ }
+
+ skb->skb_type = SKB_NODATA;
+ return skb;
+}
+
+
+/*
+ * Slab constructor for a skb head.
+ */
+static inline void skb_headerinit(void *p, kmem_cache_t *cache,
+ unsigned long flags)
+{
+ struct sk_buff *skb = p;
+
+ skb->next = NULL;
+ skb->prev = NULL;
+ skb->list = NULL;
+ skb->dev = NULL;
+ skb->pkt_type = PACKET_HOST; /* Default type */
+ skb->ip_summed = 0;
+ skb->destructor = NULL;
+}
+
+static void skb_release_data(struct sk_buff *skb)
+{
+ if (skb_shinfo(skb)->nr_frags) BUG();
+
+ switch ( skb->skb_type )
+ {
+ case SKB_NORMAL:
+ kfree(skb->head);
+ break;
+ case SKB_ZERO_COPY:
+ dealloc_skb_data_page(skb);
+ break;
+ case SKB_NODATA:
+ break;
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Free an skbuff by memory without cleaning the state.
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+ skb_release_data(skb);
+ skb_head_to_pool(skb);
+}
+
+/**
+ * __kfree_skb - private function
+ * @skb: buffer
+ *
+ * Free an sk_buff. Release anything attached to the buffer.
+ * Clean the state. This is an internal helper function. Users should
+ * always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+ if ( skb->list )
+ panic(KERN_WARNING "Warning: kfree_skb passed an skb still "
+ "on a list (from %p).\n", NET_CALLER(skb));
+
+ if ( skb->destructor )
+ skb->destructor(skb);
+
+ skb_headerinit(skb, NULL, 0); /* clean state */
+ kfree_skbmem(skb);
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+ /*
+ * Shift between the two data areas in bytes
+ */
+ unsigned long offset = new->data - old->data;
+
+ new->list=NULL;
+ new->dev=old->dev;
+ new->protocol=old->protocol;
+ new->h.raw=old->h.raw+offset;
+ new->nh.raw=old->nh.raw+offset;
+ new->mac.raw=old->mac.raw+offset;
+ new->pkt_type=old->pkt_type;
+ new->destructor = NULL;
+}
+
+/**
+ * skb_copy - create private copy of an sk_buff
+ * @skb: buffer to copy
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an &sk_buff and its data. This is used when the
+ * caller wishes to modify the data and needs a private copy of the
+ * data to alter. Returns %NULL on failure or the pointer to the buffer
+ * on success. The returned buffer has a reference count of 1.
+ *
+ * As by-product this function converts non-linear &sk_buff to linear
+ * one, so that &sk_buff becomes completely private and caller is allowed
+ * to modify all the data of returned buffer. This means that this
+ * function is not recommended for use in circumstances when only
+ * header is going to be modified. Use pskb_copy() instead.
+ */
+
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+ struct sk_buff *n;
+ int headerlen = skb->data-skb->head;
+
+ /*
+ * Allocate the copy buffer
+ */
+ n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+ if(n==NULL)
+ return NULL;
+
+ /* Set the data pointer */
+ skb_reserve(n,headerlen);
+ /* Set the tail pointer and length */
+ skb_put(n,skb->len);
+ n->csum = skb->csum;
+ n->ip_summed = skb->ip_summed;
+
+ if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
+ BUG();
+
+ copy_skb_header(n, skb);
+
+ return n;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+ int i, copy;
+ int start = skb->len - skb->data_len;
+
+ if (offset > (int)skb->len-len)
+ goto fault;
+
+ /* Copy header. */
+ if ((copy = start-offset) > 0) {
+ if (copy > len)
+ copy = len;
+ memcpy(to, skb->data + offset, copy);
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset+len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end-offset) > 0) {
+ u8 *vaddr;
+
+ if (copy > len)
+ copy = len;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
+ offset-start, copy);
+ kunmap_skb_frag(vaddr);
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+
+ if (len == 0)
+ return 0;
+
+ fault:
+ return -EFAULT;
+}
+
+void __init skb_init(void)
+{
+ int i;
+
+ skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+ sizeof(struct sk_buff),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ skb_headerinit, NULL);
+ if (!skbuff_head_cache)
+ panic("cannot create skbuff cache");
+
+ for (i=0; i<NR_CPUS; i++)
+ skb_queue_head_init(&skb_head_pool[i].list);
+}
diff --git a/xen/tools/Makefile b/xen/tools/Makefile
new file mode 100644
index 0000000000..ccf535aa49
--- /dev/null
+++ b/xen/tools/Makefile
@@ -0,0 +1,6 @@
+
+elf-reloc: elf-reloc.c
+ gcc -O2 -Wall -o $@ $<
+
+clean:
+ rm -f elf-reloc *~ core
diff --git a/xen/tools/elf-reloc.c b/xen/tools/elf-reloc.c
new file mode 100644
index 0000000000..19a839ee84
--- /dev/null
+++ b/xen/tools/elf-reloc.c
@@ -0,0 +1,118 @@
+/******************************************************************************
+ * elf-reloc.c
+ *
+ * Usage: elf-reloc <old base> <new base> <image>
+ *
+ * Relocates <image> from <old base> address to <new base> address by
+ * frobbing the Elf headers. Segment contents are unmodified!
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef unsigned long Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned long Elf32_Off;
+typedef unsigned long Elf32_Word;
+
+typedef struct {
+ unsigned char e_ident[16];
+ Elf32_Half e_type;
+ Elf32_Half e_machine;
+ Elf32_Word e_version;
+ Elf32_Addr e_entry;
+ Elf32_Off e_phoff;
+ Elf32_Off e_shoff;
+ Elf32_Word e_flags;
+ Elf32_Half e_ehsize;
+ Elf32_Half e_phentsize;
+ Elf32_Half e_phnum;
+ Elf32_Half e_shentsize;
+ Elf32_Half e_shnum;
+ Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct {
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+} Elf32_Phdr;
+
+#define offsetof(_f,_p) ((unsigned long)&(((_p *)0)->_f))
+
+
+/* Add @reloc_distance to address at offset @off in file @fp. */
+void reloc(FILE *fp, long off, unsigned long reloc_distance)
+{
+ unsigned long base;
+ fseek(fp, off, SEEK_SET);
+ fread(&base, sizeof(base), 1, fp);
+ base += reloc_distance;
+ fseek(fp, off, SEEK_SET);
+ fwrite(&base, sizeof(base), 1, fp);
+
+}
+
+
+int main(int argc, char **argv)
+{
+ unsigned long old_base, new_base, reloc_distance;
+ long virt_section, phys_section;
+ char *image_name;
+ FILE *fp;
+ Elf32_Off phoff;
+ Elf32_Half phnum, phentsz;
+ int i;
+
+ if ( argc != 4 )
+ {
+ fprintf(stderr, "Usage: elf-reloc <old base> <new base> <image>\n");
+ return(1);
+ }
+
+ old_base = strtoul(argv[1], NULL, 16);
+ new_base = strtoul(argv[2], NULL, 16);
+ image_name = argv[3];
+
+ printf("Relocating `%s' from 0x%08lX to 0x%08lX\n",
+ image_name, old_base, new_base);
+
+ fp = fopen(image_name, "rb+");
+ if ( !fp )
+ {
+ fprintf(stderr, "Failed to load image!\n");
+ return(1);
+ }
+
+ reloc_distance = new_base - old_base;
+
+ /* First frob the entry address. */
+ reloc(fp, offsetof(e_entry, Elf32_Ehdr), reloc_distance);
+
+ fseek(fp, offsetof(e_phoff, Elf32_Ehdr), SEEK_SET);
+ fread(&phoff, sizeof(phoff), 1, fp);
+ fseek(fp, offsetof(e_phnum, Elf32_Ehdr), SEEK_SET);
+ fread(&phnum, sizeof(phnum), 1, fp);
+ fseek(fp, offsetof(e_phentsize, Elf32_Ehdr), SEEK_SET);
+ fread(&phentsz, sizeof(phentsz), 1, fp);
+
+ virt_section = (long)phoff + offsetof(p_vaddr, Elf32_Phdr);
+ phys_section = (long)phoff + offsetof(p_paddr, Elf32_Phdr);
+ for ( i = 0; i < phnum; i++ )
+ {
+ reloc(fp, phys_section, reloc_distance);
+ reloc(fp, virt_section, reloc_distance);
+ phys_section += phentsz;
+ virt_section += phentsz;
+ }
+
+ fclose(fp);
+
+ return(0);
+}